From d9bba621c8c68ae60876905c76ef613ca6a72d8e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 17 Aug 2007 19:35:40 +0100 Subject: [PATCH 01/84] Imported latest versions from Etherboot 5.4 --- src/drivers/net/mlx_ipoib/cmdif_mt25218.c | 51 ++++++++++++--- .../net/mlx_ipoib/doc/README.boot_over_ib | 20 +++--- src/drivers/net/mlx_ipoib/ib_driver.c | 10 ++- src/drivers/net/mlx_ipoib/ib_mt23108.c | 16 ++++- src/drivers/net/mlx_ipoib/ib_mt25218.c | 32 +++++++++- src/drivers/net/mlx_ipoib/ipoib.c | 62 +------------------ src/drivers/net/mlx_ipoib/mt23108.c | 6 +- src/drivers/net/mlx_ipoib/mt25218.c | 6 +- src/drivers/net/mlx_ipoib/mt25218.h | 2 + src/drivers/net/mlx_ipoib/mt_version.c | 2 +- src/drivers/net/mlx_ipoib/patches/dhcpd.patch | 18 ++---- 11 files changed, 122 insertions(+), 103 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/cmdif_mt25218.c b/src/drivers/net/mlx_ipoib/cmdif_mt25218.c index fb95edbe..704fb1fc 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_mt25218.c +++ b/src/drivers/net/mlx_ipoib/cmdif_mt25218.c @@ -22,14 +22,6 @@ #include "cmdif_priv.h" #include "mt25218.h" -/* - * cmd_sys_dis - */ -static int cmd_sys_dis(void) -{ - return 0; -} - /* * cmd_write_mgm */ @@ -325,6 +317,24 @@ static int cmd_map_icm_aux(struct map_icm_st *map_icm_aux_p) return rc; } + +/* + * cmd_unmap_icm_aux + */ +static int cmd_unmap_icm_aux(void) +{ + int rc; + command_fields_t cmd_desc; + + memset(&cmd_desc, 0, sizeof cmd_desc); + + cmd_desc.opcode = MEMFREE_CMD_UNMAP_ICM_AUX; + + rc = cmd_invoke(&cmd_desc); + + return rc; +} + /* * cmd_map_icm */ @@ -371,6 +381,31 @@ static int cmd_map_icm(struct map_icm_st *map_icm_p) return rc; } + + +/* + * cmd_unmap_icm + */ +static int cmd_unmap_icm(struct map_icm_st *map_icm_p) +{ + int rc; + command_fields_t cmd_desc; + __u32 iprm[2]; + + memset(&cmd_desc, 0, sizeof cmd_desc); + + cmd_desc.opcode = MEMFREE_CMD_UNMAP_ICM; + iprm[0] = map_icm_p->vpm_arr[0].va_h; + iprm[1] = map_icm_p->vpm_arr[0].va_l; + cmd_desc.in_param = iprm; + cmd_desc.in_trans = TRANS_IMMEDIATE; + cmd_desc.input_modifier = 1 << map_icm_p->vpm_arr[0].log2_size; + + rc = cmd_invoke(&cmd_desc); + + return rc; +} + /* * cmd_query_dev_lim */ diff --git a/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib b/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib index 07738628..062abd3f 100644 --- a/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib +++ b/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib @@ -92,13 +92,11 @@ files with .mlx extension also available from Mellanox's web site. 6. Preparing the DHCP Server ----------------------------- -DHCP messages over IP Over IB are transmitted as broadcasts. In order to -distinguish between messages belonging to a certain DHCP session, the messages -must carry the client identifier option (see ietf documentation referred to -above). As of November 2005, ISC DHCP servers do not support this feature. -They are expected to support this at the end of 2005. In order to work this -out, the appropriate patch must be applied (see patches directory). It has -been tested on version isc-dhcpd-V3.0.4b2. +The DHCP server may need to be modified in order to work on IPOIB. Some +distributuions alreay support this (Some SUSE distributuions) while others +do not. If the pre-installed server does not support IPOIB, the user can download +the sources from ISC http://www.isc.org/ and apply the appropriate patch in +the patches directory. The DHCP server must run on a machine which supports IP Over IB. The Mellanox IBGD package (gen1 or gen2) can be used to provide this. @@ -171,6 +169,14 @@ PXE_IB_PORT. 14. Installing a package from Mellanox -------------------------------------- +The package comes as a compressed file with extension .bz2 or .gz. Follow +these steps: +1. Create a directory +2. cd to this directory +3. tar jxf for .bz2 files or + tar zxf for .gz files + +The binaries can be found under src/bin When using a package obtained from Mellanox Technologies' web site, the directory src/bin will contain the driver binary files. The files have a .bin extension and are equivalent to the same files with .zrom extension. diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index a46db7fc..a3015ba2 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -248,12 +248,10 @@ static int ib_driver_close(int fw_fatal) ret = 1; } - if (!fw_fatal) { - rc = cmd_sys_dis(); - if (rc) { - eprintf(""); - ret = 1; - } + rc = unset_hca(); + if (rc) { + eprintf(""); + ret = 1; } return ret; diff --git a/src/drivers/net/mlx_ipoib/ib_mt23108.c b/src/drivers/net/mlx_ipoib/ib_mt23108.c index ca3abb10..e09eabfe 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt23108.c +++ b/src/drivers/net/mlx_ipoib/ib_mt23108.c @@ -21,7 +21,7 @@ #include "mt23108.h" #include "ib_driver.h" -#include +#include "pci.h" struct device_buffers_st { union recv_wqe_u mads_qp_rcv_queue[NUM_MADS_RCV_WQES] @@ -799,6 +799,20 @@ static int setup_hca(__u8 port, void **eq_p) return ret; } + +static int unset_hca(void) +{ + int rc = 0; + + if (!fw_fatal) { + rc = cmd_sys_dis(); + if (rc) + eprintf(""); + } + + return rc; +} + static void *get_inprm_buf(void) { return dev_buffers_p->inprm_buf; diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index f16577f1..8b6b9945 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -21,7 +21,7 @@ #include "mt25218.h" #include "ib_driver.h" -#include +#include "pci.h" #define MOD_INC(counter, max_count) (counter) = ((counter)+1) & ((max_count) - 1) @@ -89,6 +89,10 @@ static struct dev_pci_struct memfree_pci_dev; static struct device_buffers_st *dev_buffers_p; static struct device_ib_data_st dev_ib_data; + + +struct map_icm_st icm_map_obj; + static int gw_write_cr(__u32 addr, __u32 data) { writel(htonl(data), memfree_pci_dev.cr_space + addr); @@ -850,6 +854,8 @@ static int setup_hca(__u8 port, void **eq_p) eprintf(""); goto undo_map_fa; } + icm_map_obj = map_obj; + phys_mem.offset += (1 << (map_obj.vpm_arr[0].log2_size + 12)); init_hca.log_max_uars = log_max_uars; @@ -978,6 +984,30 @@ static int setup_hca(__u8 port, void **eq_p) return ret; } + +static int unset_hca(void) +{ + int rc, ret = 0; + + rc = cmd_unmap_icm(&icm_map_obj); + if (rc) + eprintf(""); + ret |= rc; + + + rc = cmd_unmap_icm_aux(); + if (rc) + eprintf(""); + ret |= rc; + + rc = cmd_unmap_fa(); + if (rc) + eprintf(""); + ret |= rc; + + return ret; +} + static void *get_inprm_buf(void) { return dev_buffers_p->inprm_buf; diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index 85eaac7a..33472de3 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -357,52 +357,12 @@ static void modify_dhcp_resp(void *buf, __u16 size) modify_udp_csum(buf, size); } -static void get_my_client_id(__u8 * my_client_id) -{ - - my_client_id[0] = 0; - qpn2buf(ipoib_data.ipoib_qpn, my_client_id + 1); - memcpy(my_client_id + 4, ipoib_data.port_gid_raw, 16); -} - -static const __u8 *get_client_id(const void *buf, int len) -{ - const __u8 *ptr; - int delta; - - if (len < 268) - return NULL; - - /* pointer to just after magic cookie */ - ptr = (const __u8 *)buf + 268; - - /* find last client identifier option */ - do { - if (ptr[0] == 255) { - /* found end of options list */ - return NULL; - } - - if (ptr[0] == 0x3d) { - /* client identifer option */ - return ptr + 3; - } - - delta = ptr[1] + 2; - ptr += delta; - len -= delta; - } while (len > 0); - - return NULL; -} - static int handle_ipv4_packet(void *buf, void **out_buf_p, unsigned int *new_size_p, int *is_bcast_p) { void *new_buf; __u16 new_size; __u8 msg_type; - __u8 my_client_id[20]; new_buf = (void *)(((__u8 *) buf) + 4); new_size = (*new_size_p) - 4; @@ -411,7 +371,6 @@ static int handle_ipv4_packet(void *buf, void **out_buf_p, if (get_ip_protocl(new_buf) == IP_PROT_UDP) { __u16 udp_dst_port; - const __u8 *client_id; udp_dst_port = get_udp_dst_port(new_buf); @@ -420,22 +379,6 @@ static int handle_ipv4_packet(void *buf, void **out_buf_p, *out_buf_p = 0; return 0; } - - if (udp_dst_port == 68) { - get_my_client_id(my_client_id); - - /* packet client id */ - client_id = get_client_id(new_buf, new_size); - if (!client_id) { - *out_buf_p = 0; - return 0; - } - - if (memcmp(client_id, my_client_id, 20)) { - *out_buf_p = 0; - return 0; - } - } } msg_type = get_dhcp_msg_type(new_buf); @@ -515,8 +458,9 @@ static int ipoib_handle_rcv(void *buf, void **out_buf_p, rc = handle_ipv4_packet(buf, out_buf_p, new_size_p, is_bcast_p); return rc; } - eprintf("prot=0x%x", prot_type); - return -1; + tprintf("prot=0x%x", prot_type); + *out_buf_p = NULL; + return 0; } static int is_null_mac(const __u8 * mac) diff --git a/src/drivers/net/mlx_ipoib/mt23108.c b/src/drivers/net/mlx_ipoib/mt23108.c index 492bc901..157995d7 100644 --- a/src/drivers/net/mlx_ipoib/mt23108.c +++ b/src/drivers/net/mlx_ipoib/mt23108.c @@ -15,9 +15,9 @@ Skeleton NIC driver for Etherboot /* to get the interface to the body of the program */ #include "nic.h" /* to get the PCI support functions, if this is a PCI NIC */ -#include +#include "pci.h" /* to get the ISA support functions, if this is an ISA NIC */ -#include +#include "isa.h" #include "mt_version.c" #include "mt23108_imp.c" @@ -235,7 +235,7 @@ static struct pci_id tavor_nics[] = { PCI_ROM(0x15b3, 0x6278, "MT25208", "MT25208 HCA driver"), }; -struct pci_driver tavor_driver __pci_driver = { +static struct pci_driver tavor_driver __pci_driver = { .type = NIC_DRIVER, .name = "MT23108/MT25208", .probe = tavor_probe, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index a603cdeb..7866bf60 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -15,9 +15,9 @@ Skeleton NIC driver for Etherboot /* to get the interface to the body of the program */ #include "nic.h" /* to get the PCI support functions, if this is a PCI NIC */ -#include +#include "pci.h" /* to get the ISA support functions, if this is an ISA NIC */ -#include +#include "isa.h" #include "mt_version.c" #include "mt25218_imp.c" @@ -235,7 +235,7 @@ static struct pci_id mt25218_nics[] = { PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -struct pci_driver mt25218_driver __pci_driver = { +static struct pci_driver mt25218_driver __pci_driver = { .type = NIC_DRIVER, .name = "MT25218", .probe = mt25218_probe, diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 15a3feaf..1e7c8d8b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -26,7 +26,9 @@ #define MEMFREE_CMD_RUN_FW 0xff6 #define MEMFREE_CMD_SET_ICM_SIZE 0xffd #define MEMFREE_CMD_MAP_ICM_AUX 0xffc +#define MEMFREE_CMD_UNMAP_ICM_AUX 0xffb #define MEMFREE_CMD_MAP_ICM 0xffa +#define MEMFREE_CMD_UNMAP_ICM 0xff9 #define MEMFREE_CMD_QUERY_DEV_LIM 0x003 /* diff --git a/src/drivers/net/mlx_ipoib/mt_version.c b/src/drivers/net/mlx_ipoib/mt_version.c index 2dbd67a6..bae860c3 100644 --- a/src/drivers/net/mlx_ipoib/mt_version.c +++ b/src/drivers/net/mlx_ipoib/mt_version.c @@ -20,4 +20,4 @@ */ /* definition of the build version goes here */ -const char *build_revision= "113"; +const char *build_revision= "191"; diff --git a/src/drivers/net/mlx_ipoib/patches/dhcpd.patch b/src/drivers/net/mlx_ipoib/patches/dhcpd.patch index e2d0a202..3f6269bf 100644 --- a/src/drivers/net/mlx_ipoib/patches/dhcpd.patch +++ b/src/drivers/net/mlx_ipoib/patches/dhcpd.patch @@ -1,17 +1,7 @@ -diff -ru ../../orig/dhcp-3.0.4b2/common/options.c ./common/options.c ---- ../../orig/dhcp-3.0.4b2/common/options.c 2005-11-02 01:19:03.000000000 +0200 -+++ ./common/options.c 2005-12-06 14:38:17.000000000 +0200 -@@ -537,6 +537,7 @@ - priority_list [priority_len++] = DHO_DHCP_LEASE_TIME; - priority_list [priority_len++] = DHO_DHCP_MESSAGE; - priority_list [priority_len++] = DHO_DHCP_REQUESTED_ADDRESS; -+ priority_list [priority_len++] = DHO_DHCP_CLIENT_IDENTIFIER; - priority_list [priority_len++] = DHO_FQDN; - - if (prl && prl -> len > 0) { -diff -ru ../../orig/dhcp-3.0.4b2/includes/site.h ./includes/site.h ---- ../../orig/dhcp-3.0.4b2/includes/site.h 2002-03-12 20:33:39.000000000 +0200 -+++ ./includes/site.h 2005-12-06 14:36:55.000000000 +0200 +Index: dhcp-3.0.4b3/includes/site.h +=================================================================== +--- dhcp-3.0.4b3.orig/includes/site.h 2002-03-12 20:33:39.000000000 +0200 ++++ dhcp-3.0.4b3/includes/site.h 2006-03-15 12:50:00.000000000 +0200 @@ -135,7 +135,7 @@ the aforementioned problems do not matter to you, or if no other API is supported for your system, you may want to go with it. */ From d0974ec7166c42be774d32df954a0e71283fa245 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 17 Aug 2007 19:51:08 +0100 Subject: [PATCH 02/84] Separate out arch-independent parts of stdint.h --- src/arch/i386/include/{ => bits}/stdint.h | 24 +++-------------------- src/include/stdint.h | 24 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) rename src/arch/i386/include/{ => bits}/stdint.h (50%) create mode 100644 src/include/stdint.h diff --git a/src/arch/i386/include/stdint.h b/src/arch/i386/include/bits/stdint.h similarity index 50% rename from src/arch/i386/include/stdint.h rename to src/arch/i386/include/bits/stdint.h index 34bea8b4..a2947cda 100644 --- a/src/arch/i386/include/stdint.h +++ b/src/arch/i386/include/bits/stdint.h @@ -1,5 +1,5 @@ -#ifndef STDINT_H -#define STDINT_H +#ifndef _BITS_STDINT_H +#define _BITS_STDINT_H typedef typeof(sizeof(int)) size_t; typedef signed long ssize_t; @@ -18,22 +18,4 @@ typedef signed long long int64_t; typedef unsigned long physaddr_t; typedef unsigned long intptr_t; -typedef int8_t s8; -typedef uint8_t u8; -typedef int16_t s16; -typedef uint16_t u16; -typedef int32_t s32; -typedef uint32_t u32; -typedef int64_t s64; -typedef uint64_t u64; - -typedef int8_t int8; -typedef uint8_t uint8; -typedef int16_t int16; -typedef uint16_t uint16; -typedef int32_t int32; -typedef uint32_t uint32; -typedef int64_t int64; -typedef uint64_t uint64; - -#endif /* STDINT_H */ +#endif /* _BITS_STDINT_H */ diff --git a/src/include/stdint.h b/src/include/stdint.h new file mode 100644 index 00000000..4b0e44f2 --- /dev/null +++ b/src/include/stdint.h @@ -0,0 +1,24 @@ +#ifndef _STDINT_H +#define _STDINT_H + +#include + +typedef int8_t s8; +typedef uint8_t u8; +typedef int16_t s16; +typedef uint16_t u16; +typedef int32_t s32; +typedef uint32_t u32; +typedef int64_t s64; +typedef uint64_t u64; + +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; + +#endif /* _STDINT_H */ From 9d2c54735e377b45bc3a6504286077819667325a Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 17 Aug 2007 20:34:48 +0100 Subject: [PATCH 03/84] Started fixing up compilation warnings. --- src/drivers/net/mlx_ipoib/cmdif_comm.c | 9 ++++++--- src/drivers/net/mlx_ipoib/ib_mad.c | 2 +- src/drivers/net/mlx_ipoib/ib_mt23108.c | 22 +++++++++++----------- src/drivers/net/mlx_ipoib/ib_mt25218.c | 22 +++++++++++----------- src/drivers/net/mlx_ipoib/ipoib.c | 4 ++-- src/drivers/net/mlx_ipoib/mt23108.c | 4 ---- src/drivers/net/mlx_ipoib/mt23108_imp.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 4 ---- src/drivers/net/mlx_ipoib/mt25218_imp.c | 2 +- 9 files changed, 33 insertions(+), 38 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/cmdif_comm.c b/src/drivers/net/mlx_ipoib/cmdif_comm.c index d43a1068..cf7b9e55 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_comm.c +++ b/src/drivers/net/mlx_ipoib/cmdif_comm.c @@ -542,7 +542,10 @@ static int cmd_mgid_hash(__u8 * gid, __u16 * mgid_hash_p) { int rc; command_fields_t cmd_desc; - __u16 result[2]; + union { + __u32 u32; + __u16 u16[2]; + } result; memset(&cmd_desc, 0, sizeof cmd_desc); @@ -554,9 +557,9 @@ static int cmd_mgid_hash(__u8 * gid, __u16 * mgid_hash_p) rc = cmd_invoke(&cmd_desc); if (!rc) { - rc = gw_read_cr(HCR_BASE + 16, (__u32 *) result); + rc = gw_read_cr(HCR_BASE + 16, &result.u32); if (!rc) { - *mgid_hash_p = result[0]; + *mgid_hash_p = result.u16[0]; } } diff --git a/src/drivers/net/mlx_ipoib/ib_mad.c b/src/drivers/net/mlx_ipoib/ib_mad.c index 3e263a5b..73b49f20 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.c +++ b/src/drivers/net/mlx_ipoib/ib_mad.c @@ -158,7 +158,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) eprintf(""); return -1; } - tprintf("allocated snd_wqe=0x%lx", snd_wqe); + tprintf("allocated snd_wqe=%p", snd_wqe); mad = get_send_wqe_buf(snd_wqe, 0); memset(mad, 0, 256); diff --git a/src/drivers/net/mlx_ipoib/ib_mt23108.c b/src/drivers/net/mlx_ipoib/ib_mt23108.c index e09eabfe..881f5933 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt23108.c +++ b/src/drivers/net/mlx_ipoib/ib_mt23108.c @@ -21,7 +21,7 @@ #include "mt23108.h" #include "ib_driver.h" -#include "pci.h" +#include struct device_buffers_st { union recv_wqe_u mads_qp_rcv_queue[NUM_MADS_RCV_WQES] @@ -203,7 +203,7 @@ static int ib_device_init(struct pci_device *dev) eprintf(""); return -1; } - tprintf("uar_base (pa:va) = 0x%lx 0x%lx", + tprintf("uar_base (pa:va) = 0x%lx %p", tavor_pci_dev.dev.bar[2] + UAR_IDX * 0x1000, tavor_pci_dev.uar); tprintf(""); @@ -225,7 +225,7 @@ static int init_dev_data(void) dev_buffers_p = bus_to_virt(tmp); memreg_size = (__u32) (&memreg_size) - (__u32) dev_buffers_p; - tprintf("src_buf=0x%lx, dev_buffers_p=0x%lx, memreg_size=0x%x", src_buf, + tprintf("src_buf=%p, dev_buffers_p=%p, memreg_size=0x%lx", src_buf, dev_buffers_p, memreg_size); return 0; @@ -551,9 +551,9 @@ static int setup_hca(__u8 port, void **eq_p) tprintf("fw_rev_major=%d", qfw.fw_rev_major); tprintf("fw_rev_minor=%d", qfw.fw_rev_minor); tprintf("fw_rev_subminor=%d", qfw.fw_rev_subminor); - tprintf("error_buf_start_h=0x%x", qfw.error_buf_start_h); - tprintf("error_buf_start_l=0x%x", qfw.error_buf_start_l); - tprintf("error_buf_size=%d", qfw.error_buf_size); + tprintf("error_buf_start_h=0x%lx", qfw.error_buf_start_h); + tprintf("error_buf_start_l=0x%lx", qfw.error_buf_start_l); + tprintf("error_buf_size=%ld", qfw.error_buf_size); } if (qfw.error_buf_start_h) { @@ -944,7 +944,7 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) __u32 *psrc, *pdst; __u32 nds; - tprintf("snd_wqe=0x%lx, virt_to_bus(snd_wqe)=0x%lx", snd_wqe, + tprintf("snd_wqe=%p, virt_to_bus(snd_wqe)=0x%lx", snd_wqe, virt_to_bus(snd_wqe)); memset(&dbell, 0, sizeof dbell); @@ -1068,7 +1068,7 @@ static int create_ipoib_qp(void **qp_pp, /* update data */ qp->rcv_wq[i].wqe_cont.qp = qp; qp->rcv_bufs[i] = ib_buffers.ipoib_rcv_buf[i]; - tprintf("rcv_buf=%lx", qp->rcv_bufs[i]); + tprintf("rcv_buf=%p", qp->rcv_bufs[i]); } /* init send queue WQEs list */ @@ -1401,7 +1401,7 @@ static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) eprintf("syndrome=0x%lx", EX_FLD(cqe.error_cqe, tavorprm_completion_with_error_st, syndrome)); - eprintf("wqe_addr=0x%lx", wqe_p); + eprintf("wqe_addr=%p", wqe_p); eprintf("wqe_size=0x%lx", EX_FLD(cqe.error_cqe, tavorprm_completion_with_error_st, wqe_size)); @@ -1530,7 +1530,7 @@ static struct recv_wqe_st *alloc_rcv_wqe(struct udqp_st *qp) wqe->mpointer[1].lkey = dev_ib_data.mkey; wqe->mpointer[1].byte_count = qp->rcv_buf_sz; - tprintf("rcv_buf=%lx\n", qp->rcv_bufs[new_entry]); + tprintf("rcv_buf=%p\n", qp->rcv_bufs[new_entry]); /* we do it only on the data segment since the control segment is always owned by HW */ @@ -1657,7 +1657,7 @@ static int poll_eq(struct ib_eqe_st *ib_eqe_p, __u8 * num_eqes) struct eq_st *eq = &dev_ib_data.eq; ptr = (__u32 *) (&(eq->eq_buf[eq->cons_idx])); - tprintf("cons)idx=%d, addr(eqe)=%x, val=0x%x", eq->cons_idx, virt_to_bus(ptr), ptr[7]); + tprintf("cons)idx=%ld, addr(eqe)=%lx, val=0x%lx", eq->cons_idx, virt_to_bus(ptr), ptr[7]); owner = (ptr[7] & 0x80000000) ? OWNER_HW : OWNER_SW; if (owner == OWNER_SW) { tprintf("got eqe"); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 8b6b9945..1e9ac637 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -21,7 +21,7 @@ #include "mt25218.h" #include "ib_driver.h" -#include "pci.h" +#include #define MOD_INC(counter, max_count) (counter) = ((counter)+1) & ((max_count) - 1) @@ -159,7 +159,7 @@ static int ib_device_init(struct pci_device *dev) eprintf(""); return -1; } - tprintf("uar_base (pa:va) = 0x%lx 0x%lx", + tprintf("uar_base (pa:va) = 0x%lx %p", memfree_pci_dev.dev.bar[2] + UAR_IDX * 0x1000, memfree_pci_dev.uar); @@ -183,12 +183,12 @@ static int init_dev_data(void) dev_buffers_p = bus_to_virt(tmp); memreg_size = (__u32) (&memreg_size) - (__u32) dev_buffers_p; - tprintf("src_buf=0x%lx, dev_buffers_p=0x%lx, memreg_size=0x%x", src_buf, + tprintf("src_buf=%p, dev_buffers_p=%p, memreg_size=0x%lx", src_buf, dev_buffers_p, memreg_size); - tprintf("inprm: va=0x%lx, pa=0x%lx", dev_buffers_p->inprm_buf, + tprintf("inprm: va=%p, pa=0x%lx", dev_buffers_p->inprm_buf, virt_to_bus(dev_buffers_p->inprm_buf)); - tprintf("outprm: va=0x%lx, pa=0x%lx", dev_buffers_p->outprm_buf, + tprintf("outprm: va=%p, pa=0x%lx", dev_buffers_p->outprm_buf, virt_to_bus(dev_buffers_p->outprm_buf)); phys_mem.base = @@ -665,9 +665,9 @@ static int setup_hca(__u8 port, void **eq_p) tprintf("fw_rev_major=%d", qfw.fw_rev_major); tprintf("fw_rev_minor=%d", qfw.fw_rev_minor); tprintf("fw_rev_subminor=%d", qfw.fw_rev_subminor); - tprintf("error_buf_start_h=0x%x", qfw.error_buf_start_h); - tprintf("error_buf_start_l=0x%x", qfw.error_buf_start_l); - tprintf("error_buf_size=%d", qfw.error_buf_size); + tprintf("error_buf_start_h=0x%lx", qfw.error_buf_start_h); + tprintf("error_buf_start_l=0x%lx", qfw.error_buf_start_l); + tprintf("error_buf_size=%ld", qfw.error_buf_size); } @@ -840,7 +840,7 @@ static int setup_hca(__u8 port, void **eq_p) uar_context_pa = phys_mem.base + phys_mem.offset + dev_ib_data.uar_idx * 4096; uar_context_va = phys_to_virt(uar_context_pa); - tprintf("uar_context: va=0x%lx, pa=0x%lx", uar_context_va, + tprintf("uar_context: va=%p, pa=0x%lx", uar_context_va, uar_context_pa); dev_ib_data.uar_context_base = uar_context_va; @@ -859,7 +859,7 @@ static int setup_hca(__u8 port, void **eq_p) phys_mem.offset += (1 << (map_obj.vpm_arr[0].log2_size + 12)); init_hca.log_max_uars = log_max_uars; - tprintf("inprm: va=0x%lx, pa=0x%lx", inprm, virt_to_bus(inprm)); + tprintf("inprm: va=%p, pa=0x%lx", inprm, virt_to_bus(inprm)); prep_init_hca_buf(&init_hca, inprm); rc = cmd_init_hca(inprm, MT_STRUCT_SIZE(arbelprm_init_hca_st)); if (rc) { @@ -1720,7 +1720,7 @@ static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) eprintf("vendor_syndrome=0x%lx", EX_FLD(cqe.error_cqe, arbelprm_completion_with_error_st, vendor_code)); - eprintf("wqe_addr=0x%lx", wqe_p); + eprintf("wqe_addr=%p", wqe_p); eprintf("myqpn=0x%lx", EX_FLD(cqe.error_cqe, arbelprm_completion_with_error_st, myqpn)); diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index 33472de3..d4124f21 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -897,7 +897,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, new_size = ib_cqe.count - GRH_SIZE; buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); - tprintf("buf=%lx", buf); + tprintf("buf=%p", buf); rc = ipoib_handle_rcv(buf, &out_buf, &new_size, is_bcast_p); if (rc) { eprintf(""); @@ -944,7 +944,7 @@ static int ipoib_init(struct pci_device *pci) ipoib_data.ipoib_qpn = ib_get_qpn(qph); if(print_info) - printf("local ipoib qpn=0x%x\n", ipoib_data.ipoib_qpn); + printf("local ipoib qpn=0x%lx\n", ipoib_data.ipoib_qpn); ipoib_data.bcast_av = ib_data.bcast_av; ipoib_data.port_gid_raw = ib_data.port_gid.raw; diff --git a/src/drivers/net/mlx_ipoib/mt23108.c b/src/drivers/net/mlx_ipoib/mt23108.c index 157995d7..bd9a92f0 100644 --- a/src/drivers/net/mlx_ipoib/mt23108.c +++ b/src/drivers/net/mlx_ipoib/mt23108.c @@ -14,10 +14,6 @@ Skeleton NIC driver for Etherboot #include "etherboot.h" /* to get the interface to the body of the program */ #include "nic.h" -/* to get the PCI support functions, if this is a PCI NIC */ -#include "pci.h" -/* to get the ISA support functions, if this is an ISA NIC */ -#include "isa.h" #include "mt_version.c" #include "mt23108_imp.c" diff --git a/src/drivers/net/mlx_ipoib/mt23108_imp.c b/src/drivers/net/mlx_ipoib/mt23108_imp.c index 4e601668..b1e855b1 100644 --- a/src/drivers/net/mlx_ipoib/mt23108_imp.c +++ b/src/drivers/net/mlx_ipoib/mt23108_imp.c @@ -106,7 +106,7 @@ static void hd(void *where, int n) int i; while (n > 0) { - printf("%X ", where); + printf("%p ", where); for (i = 0; i < ((n > 16) ? 16 : n); i++) printf(" %hhX", ((char *)where)[i]); printf("\n"); diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 7866bf60..25ae6c6b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -14,10 +14,6 @@ Skeleton NIC driver for Etherboot #include "etherboot.h" /* to get the interface to the body of the program */ #include "nic.h" -/* to get the PCI support functions, if this is a PCI NIC */ -#include "pci.h" -/* to get the ISA support functions, if this is an ISA NIC */ -#include "isa.h" #include "mt_version.c" #include "mt25218_imp.c" diff --git a/src/drivers/net/mlx_ipoib/mt25218_imp.c b/src/drivers/net/mlx_ipoib/mt25218_imp.c index fe407041..9ab57f67 100644 --- a/src/drivers/net/mlx_ipoib/mt25218_imp.c +++ b/src/drivers/net/mlx_ipoib/mt25218_imp.c @@ -106,7 +106,7 @@ static void hd(void *where, int n) int i; while (n > 0) { - printf("%X ", where); + printf("%p ", where); for (i = 0; i < ((n > 16) ? 16 : n); i++) printf(" %hhX", ((char *)where)[i]); printf("\n"); From b42c5905cb59f6ca4893afa4935eb0365079cae9 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 18 Aug 2007 18:04:18 +0100 Subject: [PATCH 04/84] Very quick and very dirty hack to get the Mellanox code building inside gPXE. --- src/Makefile | 5 ++++ src/drivers/net/mlx_ipoib/ib_mt23108.c | 30 ++++++++++++++++------ src/drivers/net/mlx_ipoib/ib_mt25218.c | 12 ++++++--- src/drivers/net/mlx_ipoib/mt23108.c | 33 ++++++++++++------------- src/drivers/net/mlx_ipoib/mt23108_imp.c | 4 +++ src/drivers/net/mlx_ipoib/mt25218.c | 33 ++++++++++++------------- src/drivers/net/mlx_ipoib/mt25218_imp.c | 4 +++ 7 files changed, 76 insertions(+), 45 deletions(-) diff --git a/src/Makefile b/src/Makefile index 0f8ddca2..f4a0796b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -164,6 +164,11 @@ SRCDIRS += usr NON_AUTO_SRCS += core/elf_loader.c NON_AUTO_SRCS += drivers/net/prism2.c +SRCS += drivers/net/mlx_ipoib/mt25218.c +SRCS += drivers/net/mlx_ipoib/mt23108.c +CFLAGS_mt25218 = -Wno-error +CFLAGS_mt23108 = -Wno-error + # Rules for finalising files. TGT_MAKEROM_FLAGS is defined as part of # the automatic build system and varies by target; it includes the # "-p 0x1234,0x5678" string to set the PCI IDs. diff --git a/src/drivers/net/mlx_ipoib/ib_mt23108.c b/src/drivers/net/mlx_ipoib/ib_mt23108.c index 881f5933..d9261b9c 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt23108.c +++ b/src/drivers/net/mlx_ipoib/ib_mt23108.c @@ -92,7 +92,12 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) for (bus = 0; bus < 256; ++bus) { for (dev = 0; dev < 32; ++dev) { devfn = (dev << 3); - rc = pcibios_read_config_word(bus, devfn, PCI_VENDOR_ID, + + struct pci_device tmp; + tmp.bus = bus; + tmp.devfn = devfn; + + rc = pcibios_read_config_word(&tmp, PCI_VENDOR_ID, &vendor); if (rc) return rc; @@ -100,7 +105,7 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) if (vendor != MELLANOX_VENDOR_ID) continue; - rc = pcibios_read_config_word(bus, devfn, PCI_DEVICE_ID, + rc = pcibios_read_config_word(&tmp, PCI_DEVICE_ID, &dev_id); if (rc) return rc; @@ -108,7 +113,7 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) if (dev_id != TAVOR_BRIDGE_DEVICE_ID) continue; - rc = pcibios_read_config_byte(bus, devfn, + rc = pcibios_read_config_byte(&tmp, PCI_SECONDARY_BUS, &sec_bus); if (rc) @@ -161,7 +166,7 @@ static int ib_device_init(struct pci_device *dev) tavor_pci_dev.dev.dev = dev; tprintf(""); - if (dev->dev_id == TAVOR_DEVICE_ID) { + if (dev->device == TAVOR_DEVICE_ID) { rc = find_mlx_bridge(dev->bus, &br_bus, &br_devfn); if (rc) { @@ -175,7 +180,12 @@ static int ib_device_init(struct pci_device *dev) tprintf("bus=%d devfn=0x%x", br_bus, br_devfn); /* save config space */ for (i = 0; i < 64; ++i) { - rc = pcibios_read_config_dword(br_bus, br_devfn, i << 2, + + struct pci_device tmp; + tmp.bus = br_bus; + tmp.devfn = br_devfn; + + rc = pcibios_read_config_dword(&tmp, i << 2, &tavor_pci_dev.br. dev_config_space[i]); if (rc) { @@ -236,10 +246,14 @@ static int restore_config(void) int i; int rc; - if (tavor_pci_dev.dev.dev->dev_id == TAVOR_DEVICE_ID) { + if (tavor_pci_dev.dev.dev->device == TAVOR_DEVICE_ID) { for (i = 0; i < 64; ++i) { - rc = pcibios_write_config_dword(tavor_pci_dev.br.bus, - tavor_pci_dev.br.devfn, + + struct pci_device tmp; + tmp.bus = tavor_pci_dev.br.bus; + tmp.devfn = tavor_pci_dev.br.devfn; + + rc = pcibios_write_config_dword(&tmp, i << 2, tavor_pci_dev.br. dev_config_space[i]); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 1e9ac637..8f3873e6 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -174,6 +174,8 @@ static inline unsigned long lalign(unsigned long buf, unsigned long align) (~(((unsigned long)align) - 1))); } +#include + static int init_dev_data(void) { unsigned long tmp; @@ -191,9 +193,13 @@ static int init_dev_data(void) tprintf("outprm: va=%p, pa=0x%lx", dev_buffers_p->outprm_buf, virt_to_bus(dev_buffers_p->outprm_buf)); - phys_mem.base = - (virt_to_phys(_text) - reserve_size) & (~(reserve_size - 1)); - + userptr_t lotsofmem = umalloc ( reserve_size * 2 ); + if ( ! lotsofmem ) { + printf ( "Could not allocate large memblock\n" ); + return -1; + } + phys_mem.base = ( ( user_to_phys ( lotsofmem, 0 ) + reserve_size ) & + ~( reserve_size - 1 ) ); phys_mem.offset = 0; return 0; diff --git a/src/drivers/net/mlx_ipoib/mt23108.c b/src/drivers/net/mlx_ipoib/mt23108.c index bd9a92f0..37947a8c 100644 --- a/src/drivers/net/mlx_ipoib/mt23108.c +++ b/src/drivers/net/mlx_ipoib/mt23108.c @@ -148,7 +148,7 @@ static void tavor_transmit(struct nic *nic, const char *dest, /* Destination */ /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ -static void tavor_disable(struct dev *dev) +static void tavor_disable(struct nic *nic) { /* put the card in its initial state */ /* This function serves 3 purposes. @@ -160,18 +160,24 @@ static void tavor_disable(struct dev *dev) * This allows etherboot to reinitialize the interface * if something is something goes wrong. */ - if (dev || 1) { // ???? + if (nic || 1) { // ???? disable_imp(); } } +static struct nic_operations tavor_operations = { + .connect = dummy_connect, + .poll = tavor_poll, + .transmit = tavor_transmit, + .irq = tavor_irq, +}; + /************************************************************************** PROBE - Look for an adapter, this routine's visible to the outside ***************************************************************************/ -static int tavor_probe(struct dev *dev, struct pci_device *pci) +static int tavor_probe(struct nic *nic, struct pci_device *pci) { - struct nic *nic = (struct nic *)dev; int rc; unsigned char user_request; @@ -215,10 +221,7 @@ static int tavor_probe(struct dev *dev, struct pci_device *pci) nic->ioaddr = pci->ioaddr & ~3; nic->irqno = pci->irq; /* point to NIC specific routines */ - dev->disable = tavor_disable; - nic->poll = tavor_poll; - nic->transmit = tavor_transmit; - nic->irq = tavor_irq; + nic->nic_op = &tavor_operations; return 1; } @@ -226,16 +229,12 @@ static int tavor_probe(struct dev *dev, struct pci_device *pci) return 0; } -static struct pci_id tavor_nics[] = { +static struct pci_device_id tavor_nics[] = { PCI_ROM(0x15b3, 0x5a44, "MT23108", "MT23108 HCA driver"), PCI_ROM(0x15b3, 0x6278, "MT25208", "MT25208 HCA driver"), }; -static struct pci_driver tavor_driver __pci_driver = { - .type = NIC_DRIVER, - .name = "MT23108/MT25208", - .probe = tavor_probe, - .ids = tavor_nics, - .id_count = sizeof(tavor_nics) / sizeof(tavor_nics[0]), - .class = 0, -}; +PCI_DRIVER ( tavor_driver, tavor_nics, PCI_NO_CLASS ); + +DRIVER ( "MT23108/MT25208", nic_driver, pci_driver, tavor_driver, + tavor_probe, tavor_disable ); diff --git a/src/drivers/net/mlx_ipoib/mt23108_imp.c b/src/drivers/net/mlx_ipoib/mt23108_imp.c index b1e855b1..d2bdf46b 100644 --- a/src/drivers/net/mlx_ipoib/mt23108_imp.c +++ b/src/drivers/net/mlx_ipoib/mt23108_imp.c @@ -91,10 +91,12 @@ static int transmit_imp(const char *dest, /* Destination */ rc = ipoib_send_packet(dest, type, packet, size); if (rc) { printf("*** ERROR IN SEND FLOW ***\n"); +#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ +#endif return -1; } @@ -222,9 +224,11 @@ static int poll_imp(struct nic *nic, int retrieve, unsigned int *size_p) fatal_handling: printf("restarting Etherboot\n"); +#if 0 sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ +#endif return -1; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 25ae6c6b..3273ebf6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -148,7 +148,7 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ -static void mt25218_disable(struct dev *dev) +static void mt25218_disable(struct nic *nic) { /* put the card in its initial state */ /* This function serves 3 purposes. @@ -160,18 +160,24 @@ static void mt25218_disable(struct dev *dev) * This allows etherboot to reinitialize the interface * if something is something goes wrong. */ - if (dev || 1) { // ???? + if (nic || 1) { // ???? disable_imp(); } } +static struct nic_operations mt25218_operations = { + .connect = dummy_connect, + .poll = mt25218_poll, + .transmit = mt25218_transmit, + .irq = mt25218_irq, +}; + /************************************************************************** PROBE - Look for an adapter, this routine's visible to the outside ***************************************************************************/ -static int mt25218_probe(struct dev *dev, struct pci_device *pci) +static int mt25218_probe(struct nic *nic, struct pci_device *pci) { - struct nic *nic = (struct nic *)dev; int rc; unsigned char user_request; @@ -215,10 +221,7 @@ static int mt25218_probe(struct dev *dev, struct pci_device *pci) nic->ioaddr = pci->ioaddr & ~3; nic->irqno = pci->irq; /* point to NIC specific routines */ - dev->disable = mt25218_disable; - nic->poll = mt25218_poll; - nic->transmit = mt25218_transmit; - nic->irq = mt25218_irq; + nic->nic_op = &mt25218_operations; return 1; } @@ -226,16 +229,12 @@ static int mt25218_probe(struct dev *dev, struct pci_device *pci) return 0; } -static struct pci_id mt25218_nics[] = { +static struct pci_device_id mt25218_nics[] = { PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -static struct pci_driver mt25218_driver __pci_driver = { - .type = NIC_DRIVER, - .name = "MT25218", - .probe = mt25218_probe, - .ids = mt25218_nics, - .id_count = sizeof(mt25218_nics) / sizeof(mt25218_nics[0]), - .class = 0, -}; +PCI_DRIVER ( mt25218_driver, mt25218_nics, PCI_NO_CLASS ); + +DRIVER ( "MT25218", nic_driver, pci_driver, mt25218_driver, + mt25218_probe, mt25218_disable ); diff --git a/src/drivers/net/mlx_ipoib/mt25218_imp.c b/src/drivers/net/mlx_ipoib/mt25218_imp.c index 9ab57f67..301f31df 100644 --- a/src/drivers/net/mlx_ipoib/mt25218_imp.c +++ b/src/drivers/net/mlx_ipoib/mt25218_imp.c @@ -91,10 +91,12 @@ static int transmit_imp(const char *dest, /* Destination */ rc = ipoib_send_packet(dest, type, packet, size); if (rc) { printf("*** ERROR IN SEND FLOW ***\n"); +#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ +#endif return -1; } @@ -221,10 +223,12 @@ static int poll_imp(struct nic *nic, int retrieve, unsigned int *size_p) return 0; fatal_handling: +#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ +#endif return -1; } From 800c8b014b53b220144e77e43080128c3e00ef16 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 18 Aug 2007 18:04:50 +0100 Subject: [PATCH 05/84] Add barrier() primitive (was present in Eb5.4), used by some currently out-of-tree driver code. --- src/include/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/include/compiler.h b/src/include/compiler.h index b130f28f..2151fc6a 100644 --- a/src/include/compiler.h +++ b/src/include/compiler.h @@ -311,6 +311,11 @@ extern void dbg_hex_dump_da ( unsigned long dispaddr, */ #define __shared __asm__ ( "_shared_bss" ) +/** + * Optimisation barrier + */ +#define barrier() __asm__ __volatile__ ( "" : : : "memory" ) + #endif /* ASSEMBLY */ #endif /* COMPILER_H */ From a45a145b8c4da48de526ecd381ba24fe64ead848 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 19 Aug 2007 00:29:27 +0100 Subject: [PATCH 06/84] Added error IDs for all files in drivers/net; they're likely to need them. --- src/include/gpxe/errfile.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 48db1dc1..4f9e7bc6 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -75,6 +75,32 @@ #define ERRFILE_rtl8139 ( ERRFILE_DRIVER | 0x002a0000 ) #define ERRFILE_smc9000 ( ERRFILE_DRIVER | 0x002b0000 ) #define ERRFILE_tg3 ( ERRFILE_DRIVER | 0x002c0000 ) +#define ERRFILE_3c509_eisa ( ERRFILE_DRIVER | 0x002d0000 ) +#define ERRFILE_3c515 ( ERRFILE_DRIVER | 0x002e0000 ) +#define ERRFILE_3c529 ( ERRFILE_DRIVER | 0x002f0000 ) +#define ERRFILE_3c595 ( ERRFILE_DRIVER | 0x00300000 ) +#define ERRFILE_3c5x9 ( ERRFILE_DRIVER | 0x00310000 ) +#define ERRFILE_3c90x ( ERRFILE_DRIVER | 0x00320000 ) +#define ERRFILE_amd8111e ( ERRFILE_DRIVER | 0x00330000 ) +#define ERRFILE_davicom ( ERRFILE_DRIVER | 0x00340000 ) +#define ERRFILE_depca ( ERRFILE_DRIVER | 0x00350000 ) +#define ERRFILE_dmfe ( ERRFILE_DRIVER | 0x00360000 ) +#define ERRFILE_e1000 ( ERRFILE_DRIVER | 0x00370000 ) +#define ERRFILE_eepro100 ( ERRFILE_DRIVER | 0x00380000 ) +#define ERRFILE_epic100 ( ERRFILE_DRIVER | 0x00390000 ) +#define ERRFILE_forcedeth ( ERRFILE_DRIVER | 0x003a0000 ) +#define ERRFILE_mtd80x ( ERRFILE_DRIVER | 0x003b0000 ) +#define ERRFILE_ns83820 ( ERRFILE_DRIVER | 0x003c0000 ) +#define ERRFILE_ns8390 ( ERRFILE_DRIVER | 0x003d0000 ) +#define ERRFILE_pcnet32 ( ERRFILE_DRIVER | 0x003e0000 ) +#define ERRFILE_r8169 ( ERRFILE_DRIVER | 0x003f0000 ) +#define ERRFILE_sis900 ( ERRFILE_DRIVER | 0x00400000 ) +#define ERRFILE_sundance ( ERRFILE_DRIVER | 0x00410000 ) +#define ERRFILE_tlan ( ERRFILE_DRIVER | 0x00420000 ) +#define ERRFILE_tulip ( ERRFILE_DRIVER | 0x00430000 ) +#define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 ) +#define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) +#define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) From 74a49af4abf826004c0172abe46d17e7f43d5ccc Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 29 Aug 2007 20:07:41 +0100 Subject: [PATCH 07/84] Force MAC address for testing purposes --- src/drivers/net/mlx_ipoib/mt25218.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 3273ebf6..11a35c2e 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -223,6 +223,10 @@ static int mt25218_probe(struct nic *nic, struct pci_device *pci) /* point to NIC specific routines */ nic->nic_op = &mt25218_operations; + uint8_t fixed_node_addr[ETH_ALEN] = { 0x00, 0x02, 0xc9, + 0x20, 0xf5, 0x95 }; + memcpy ( nic->node_addr, fixed_node_addr, ETH_ALEN ); + return 1; } /* else */ From 7b6d11e7136cee21cc9a76614174abac999f6173 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 12 Sep 2007 22:17:43 +0100 Subject: [PATCH 08/84] Started IB driver rewrite --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 7 + src/drivers/net/mlx_ipoib/mt25218.c | 206 ++++++++++++++++++++++++- src/include/gpxe/errfile.h | 1 + src/include/gpxe/infiniband.h | 52 +++++++ src/include/gpxe/netdevice.h | 4 +- src/net/infiniband.c | 118 ++++++++++++++ 6 files changed, 381 insertions(+), 7 deletions(-) create mode 100644 src/include/gpxe/infiniband.h create mode 100644 src/net/infiniband.c diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 8f3873e6..631a95cb 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1586,6 +1586,13 @@ static void prep_send_wqe_buf(void *qph, len += offset; } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); + + DBG ( "prep_send_wqe_buf()\n" ); + DBG ( "snd_wqe:\n" ); + DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); + DBG ( "packet:\n" ); + DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), + len ); } static void *alloc_ud_av(void) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 11a35c2e..b9b12c36 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -10,6 +10,15 @@ Skeleton NIC driver for Etherboot * your option) any later version. */ +#include +#include +#include +#include +#include + +struct mlx_nic { +}; + /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ @@ -145,6 +154,131 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * } } +/** + * Open network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int mlx_open ( struct net_device *netdev ) { + return 0; +} + +/** + * Close network device + * + * @v netdev Network device + */ +static void mlx_close ( struct net_device *netdev ) { +} + +#warning "Broadcast address?" +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + + +/** + * Transmit packet + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int mlx_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ibhdr *ibhdr = iobuf->data; + + DBG ( "Sending packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + DBG ( "Peer:\n" ); + DBG_HD ( &ibhdr->peer[0], IB_ALEN ); + DBG ( "Bcast:\n" ); + DBG_HD ( &ib_broadcast[0], IB_ALEN ); + + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { + printf ( "Sending broadcast packet\n" ); + return send_bcast_packet ( ibhdr->proto, iobuf->data, + iob_len ( iobuf ) ); + } else { + printf ( "Sending unicast packet\n" ); + return send_ucast_packet ( ibhdr->peer, ibhdr->proto, + iobuf->data, iob_len ( iobuf ) ); + } +} + +/** + * Poll for completed and received packets + * + * @v netdev Network device + */ +static void mlx_poll ( struct net_device *netdev ) { + struct ib_cqe_st ib_cqe; + uint8_t num_cqes; + unsigned int len; + struct io_buffer *iobuf; + void *buf; + int rc; + + if ( ( rc = poll_error_buf() ) != 0 ) { + DBG ( "poll_error_buf() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = drain_eq() ) != 0 ) { + DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ( rc = ib_poll_cq ( ipoib_data.rcv_cqh, &ib_cqe, + &num_cqes ) ) != 0 ) { + DBG ( "ib_poll_cq() failed: %s\n", strerror ( rc ) ); + return; + } + + if ( ! num_cqes ) + return; + + if ( ib_cqe.is_error ) { + DBG ( "cqe error\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + + len = ib_cqe.count; + iobuf = alloc_iob ( len ); + if ( ! iobuf ) { + DBG ( "out of memory\n" ); + free_wqe ( ib_cqe.wqe ); + return; + } + memcpy ( iob_put ( iobuf, len ), buf, len ); + DBG ( "Received packet:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + netdev_rx ( netdev, iobuf ); + + free_wqe ( ib_cqe.wqe ); +} + +/** + * Enable or disable interrupts + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void mlx_irq ( struct net_device *netdev, int enable ) { +} + +static struct net_device_operations mlx_operations = { + .open = mlx_open, + .close = mlx_close, + .transmit = mlx_transmit, + .poll = mlx_poll, + .irq = mlx_irq, +}; + /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ @@ -165,6 +299,21 @@ static void mt25218_disable(struct nic *nic) } } +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void mlx_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + struct mlx_nic *mlx = netdev->priv; + + unregister_netdev ( netdev ); + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + static struct nic_operations mt25218_operations = { .connect = dummy_connect, .poll = mt25218_poll, @@ -233,12 +382,59 @@ static int mt25218_probe(struct nic *nic, struct pci_device *pci) return 0; } -static struct pci_device_id mt25218_nics[] = { +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int mlx_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct net_device *netdev; + struct mlx_nic *mlx; + int rc; + + /* Allocate net device */ + netdev = alloc_ibdev ( sizeof ( *mlx ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &mlx_operations ); + mlx = netdev->priv; + pci_set_drvdata ( pci, netdev ); + netdev->dev = &pci->dev; + memset ( mlx, 0, sizeof ( *mlx ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ipoib_init ( pci ) ) != 0 ) + goto err_ipoib_init; + memcpy ( netdev->ll_addr, ipoib_data.port_gid_raw, IB_ALEN ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + err_ipoib_init: + ipoib_close(0); + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +static struct pci_device_id mlx_nics[] = { PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -PCI_DRIVER ( mt25218_driver, mt25218_nics, PCI_NO_CLASS ); - -DRIVER ( "MT25218", nic_driver, pci_driver, mt25218_driver, - mt25218_probe, mt25218_disable ); +struct pci_driver mlx_driver __pci_driver = { + .ids = mlx_nics, + .id_count = ( sizeof ( mlx_nics ) / sizeof ( mlx_nics[0] ) ), + .probe = mlx_probe, + .remove = mlx_remove, +}; diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 4f9e7bc6..3413f9cf 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -123,6 +123,7 @@ #define ERRFILE_dhcp ( ERRFILE_NET | 0x00100000 ) #define ERRFILE_dns ( ERRFILE_NET | 0x00110000 ) #define ERRFILE_tftp ( ERRFILE_NET | 0x00120000 ) +#define ERRFILE_infiniband ( ERRFILE_NET | 0x00130000 ) #define ERRFILE_image ( ERRFILE_IMAGE | 0x00000000 ) #define ERRFILE_elf ( ERRFILE_IMAGE | 0x00010000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h new file mode 100644 index 00000000..126113a7 --- /dev/null +++ b/src/include/gpxe/infiniband.h @@ -0,0 +1,52 @@ +#ifndef _GPXE_INFINIBAND_H +#define _GPXE_INFINIBAND_H + +/** @file + * + * Infiniband protocol + * + */ + +#include +#include + +/** Infiniband hardware address length */ +#define IB_ALEN 20 +#define IB_HLEN 24 + +/** An Infiniband header + * + * This data structure doesn't represent the on-wire format, but does + * contain all the information required by the driver to construct the + * packet. + */ +struct ibhdr { + /** Peer address */ + uint8_t peer[IB_ALEN]; + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +extern struct ll_protocol infiniband_protocol; + +extern const char * ib_ntoa ( const void *ll_addr ); + +/** + * Allocate Infiniband device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &infiniband_protocol; + } + return netdev; +} + +#endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/netdevice.h b/src/include/gpxe/netdevice.h index d82c6d8f..2cbd0efb 100644 --- a/src/include/gpxe/netdevice.h +++ b/src/include/gpxe/netdevice.h @@ -19,10 +19,10 @@ struct ll_protocol; struct device; /** Maximum length of a link-layer address */ -#define MAX_LL_ADDR_LEN 6 +#define MAX_LL_ADDR_LEN 20 /** Maximum length of a link-layer header */ -#define MAX_LL_HEADER_LEN 16 +#define MAX_LL_HEADER_LEN 32 /** Maximum length of a network-layer address */ #define MAX_NET_ADDR_LEN 4 diff --git a/src/net/infiniband.c b/src/net/infiniband.c new file mode 100644 index 00000000..bcfac292 --- /dev/null +++ b/src/net/infiniband.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * Infiniband protocol + * + */ + +/** Infiniband broadcast MAC address */ +static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; + +/** + * Transmit Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the Infiniband link-layer header and transmits the packet. + */ +static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, const void *ll_dest ) { + struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); + + + /* Build Infiniband header */ + memcpy ( ibhdr->peer, ll_dest, IB_ALEN ); + ibhdr->proto = net_protocol->net_proto; + ibhdr->reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received Infiniband packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the Infiniband link-layer header and passes up to the + * network-layer protocol. + */ +static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ibhdr *ibhdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ibhdr ) ) { + DBG ( "Infiniband packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off Infiniband header */ + iob_pull ( iobuf, sizeof ( *ibhdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ibhdr->proto, ibhdr->peer ); +} + +/** + * Transcribe Infiniband address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ib_addr[i] ); + } + return ( buf + 1 ); +} + +/** Infiniband protocol */ +struct ll_protocol infiniband_protocol __ll_protocol = { + .name = "Infiniband", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IB_ALEN, + .ll_header_len = IB_HLEN, + .ll_broadcast = ib_broadcast, + .tx = ib_tx, + .rx = ib_rx, + .ntoa = ib_ntoa, +}; From c0d99245a9ad58429ee21bb6a441d965aca34997 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 01:13:57 +0100 Subject: [PATCH 09/84] Bugfix: DHCP message type should be a one-byte option... (d'oh) --- src/net/udp/dhcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/net/udp/dhcp.c b/src/net/udp/dhcp.c index 86695f12..9e48fe11 100644 --- a/src/net/udp/dhcp.c +++ b/src/net/udp/dhcp.c @@ -294,8 +294,7 @@ static int copy_dhcp_packet_options ( struct dhcp_packet *dhcppkt, * dhcp_packet structure that can be passed to * set_dhcp_packet_option() or copy_dhcp_packet_options(). */ -static int create_dhcp_packet ( struct net_device *netdev, - unsigned int msgtype, +static int create_dhcp_packet ( struct net_device *netdev, uint8_t msgtype, void *data, size_t max_len, struct dhcp_packet *dhcppkt ) { struct dhcphdr *dhcphdr = data; From 9e32e8e513dec3e6ba99079feef08cec83980800 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 01:14:56 +0100 Subject: [PATCH 10/84] Now sends IP packets correctly. --- src/drivers/net/mlx_ipoib/mt25218.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index b9b12c36..360a12fd 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -187,23 +187,16 @@ static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct ibhdr *ibhdr = iobuf->data; - DBG ( "Sending packet:\n" ); - // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); - - DBG ( "Peer:\n" ); - DBG_HD ( &ibhdr->peer[0], IB_ALEN ); - DBG ( "Bcast:\n" ); - DBG_HD ( &ib_broadcast[0], IB_ALEN ); - iob_pull ( iobuf, sizeof ( *ibhdr ) ); if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { printf ( "Sending broadcast packet\n" ); - return send_bcast_packet ( ibhdr->proto, iobuf->data, - iob_len ( iobuf ) ); + return send_bcast_packet ( ntohs ( ibhdr->proto ), + iobuf->data, iob_len ( iobuf ) ); } else { printf ( "Sending unicast packet\n" ); - return send_ucast_packet ( ibhdr->peer, ibhdr->proto, + return send_ucast_packet ( ibhdr->peer, + ntohs ( ibhdr->proto ), iobuf->data, iob_len ( iobuf ) ); } } From 7e4e5af462a9f62f74e7a5c49802431248dda8b2 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 01:53:04 +0100 Subject: [PATCH 11/84] Use RFC4390 whenever hardware address exceeds 16 bytes; this allows us to construct DHCP packets suitable for Infiniband. --- src/include/gpxe/dhcp.h | 7 +++++++ src/net/udp/dhcp.c | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/src/include/gpxe/dhcp.h b/src/include/gpxe/dhcp.h index 43bccf18..18baa86a 100644 --- a/src/include/gpxe/dhcp.h +++ b/src/include/gpxe/dhcp.h @@ -432,6 +432,13 @@ struct dhcphdr { /** Opcode for a reply from server to client */ #define BOOTP_REPLY 2 +/** BOOTP reply must be broadcast + * + * Clients that cannot accept unicast BOOTP replies must set this + * flag. + */ +#define BOOTP_FL_BROADCAST 0x8000 + /** DHCP magic cookie */ #define DHCP_MAGIC_COOKIE 0x63825363UL diff --git a/src/net/udp/dhcp.c b/src/net/udp/dhcp.c index 9e48fe11..8e34ccb6 100644 --- a/src/net/udp/dhcp.c +++ b/src/net/udp/dhcp.c @@ -298,6 +298,7 @@ static int create_dhcp_packet ( struct net_device *netdev, uint8_t msgtype, void *data, size_t max_len, struct dhcp_packet *dhcppkt ) { struct dhcphdr *dhcphdr = data; + unsigned int hlen; int rc; /* Sanity check */ @@ -309,9 +310,17 @@ static int create_dhcp_packet ( struct net_device *netdev, uint8_t msgtype, dhcphdr->xid = dhcp_xid ( netdev ); dhcphdr->magic = htonl ( DHCP_MAGIC_COOKIE ); dhcphdr->htype = ntohs ( netdev->ll_protocol->ll_proto ); - dhcphdr->hlen = netdev->ll_protocol->ll_addr_len; - memcpy ( dhcphdr->chaddr, netdev->ll_addr, dhcphdr->hlen ); dhcphdr->op = dhcp_op[msgtype]; + /* If hardware length exceeds the chaddr field length, don't + * use the chaddr field. This is as per RFC4390. + */ + hlen = netdev->ll_protocol->ll_addr_len; + if ( hlen > sizeof ( dhcphdr->chaddr ) ) { + hlen = 0; + dhcphdr->flags = htons ( BOOTP_FL_BROADCAST ); + } + dhcphdr->hlen = hlen; + memcpy ( dhcphdr->chaddr, netdev->ll_addr, hlen ); /* Initialise DHCP packet structure */ dhcppkt->dhcphdr = dhcphdr; @@ -494,6 +503,14 @@ struct dhcp_netdev_desc { uint16_t device; } __attribute__ (( packed )); +/** DHCP client identifier */ +struct dhcp_client_id { + /** Link-layer protocol */ + uint8_t ll_proto; + /** Link-layer address */ + uint8_t ll_addr[MAX_LL_ADDR_LEN]; +} __attribute__ (( packed )); + /** * Create DHCP request * @@ -511,7 +528,9 @@ int create_dhcp_request ( struct net_device *netdev, int msgtype, struct dhcp_packet *dhcppkt ) { struct device_description *desc = &netdev->dev->desc; struct dhcp_netdev_desc dhcp_desc; + struct dhcp_client_id client_id; size_t dhcp_features_len; + size_t ll_addr_len; int rc; /* Create DHCP packet */ @@ -570,6 +589,21 @@ int create_dhcp_request ( struct net_device *netdev, int msgtype, return rc; } + /* Add DHCP client identifier. Required for Infiniband, and + * doesn't hurt other link layers. + */ + client_id.ll_proto = netdev->ll_protocol->ll_proto; + ll_addr_len = netdev->ll_protocol->ll_addr_len; + assert ( ll_addr_len <= sizeof ( client_id.ll_addr ) ); + memcpy ( client_id.ll_addr, netdev->ll_addr, ll_addr_len ); + if ( ( rc = set_dhcp_packet_option ( dhcppkt, DHCP_CLIENT_ID, + &client_id, + ( ll_addr_len + 1 ) ) ) != 0 ) { + DBG ( "DHCP could not set client ID: %s\n", + strerror ( rc ) ); + return rc; + } + return 0; } From 03c90e183f5a90327205efb5a88d5c2924236cf3 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 03:15:27 +0100 Subject: [PATCH 12/84] Dump received packet, including GRH. --- src/drivers/net/mlx_ipoib/mt25218.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 360a12fd..de91dac6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -246,7 +246,13 @@ static void mlx_poll ( struct net_device *netdev ) { free_wqe ( ib_cqe.wqe ); return; } + buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); memcpy ( iob_put ( iobuf, len ), buf, len ); + DBG ( "Received packet header:\n" ); + struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; + DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), + be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); + DBG ( "Received packet:\n" ); DBG_HD ( iobuf->data, iob_len ( iobuf ) ); From 30a19c3f1c51ff404d8de3196c4355fba3083c8e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 14:43:12 +0100 Subject: [PATCH 13/84] Can now both send and receive packets. LL header format not yet fixed; still using a quick hack-up just to be able to pass through data. --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 ++ src/drivers/net/mlx_ipoib/mt25218.c | 17 +++++++----- src/include/gpxe/infiniband.h | 37 ++++++++++++++++++++++++++ src/net/infiniband.c | 11 ++++++++ 4 files changed, 60 insertions(+), 7 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 631a95cb..dcd49e45 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1587,12 +1587,14 @@ static void prep_send_wqe_buf(void *qph, } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); +#if 0 DBG ( "prep_send_wqe_buf()\n" ); DBG ( "snd_wqe:\n" ); DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); DBG ( "packet:\n" ); DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), len ); +#endif } static void *alloc_ud_av(void) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index de91dac6..a468f160 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -248,13 +248,13 @@ static void mlx_poll ( struct net_device *netdev ) { } buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); memcpy ( iob_put ( iobuf, len ), buf, len ); - DBG ( "Received packet header:\n" ); - struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; - DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), - be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); + // DBG ( "Received packet header:\n" ); + // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; + // DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), + // be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); - DBG ( "Received packet:\n" ); - DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + // DBG ( "Received packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); netdev_rx ( netdev, iobuf ); @@ -392,6 +392,7 @@ static int mlx_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct net_device *netdev; struct mlx_nic *mlx; + struct ib_mac *mac; int rc; /* Allocate net device */ @@ -410,7 +411,9 @@ static int mlx_probe ( struct pci_device *pci, /* Initialise hardware */ if ( ( rc = ipoib_init ( pci ) ) != 0 ) goto err_ipoib_init; - memcpy ( netdev->ll_addr, ipoib_data.port_gid_raw, IB_ALEN ); + mac = ( ( struct ib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib_data.ipoib_qpn ); + memcpy ( &mac->gid, ipoib_data.port_gid_raw, sizeof ( mac->gid ) ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 126113a7..11cec189 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -14,6 +14,43 @@ #define IB_ALEN 20 #define IB_HLEN 24 +/** An Infiniband Global Identifier */ +struct ib_gid { + uint8_t bytes[16]; +}; + +/** An Infiniband Global Route Header */ +struct ib_global_route_header { + /** IP version, traffic class, and flow label + * + * 4 bits : Version of the GRH + * 8 bits : Traffic class + * 20 bits : Flow label + */ + uint32_t ipver_tclass_flowlabel; + /** Payload length */ + uint16_t paylen; + /** Next header */ + uint8_t nxthdr; + /** Hop limit */ + uint8_t hoplmt; + /** Source GID */ + struct ib_gid sgid; + /** Destiniation GID */ + struct ib_gid dgid; +} __attribute__ (( packed )); + +/** An Infiniband MAC address */ +struct ib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + /** An Infiniband header * * This data structure doesn't represent the on-wire format, but does diff --git a/src/net/infiniband.c b/src/net/infiniband.c index bcfac292..c7fabd0e 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -70,6 +70,17 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, * network-layer protocol. */ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + + struct { + uint16_t proto; + uint16_t reserved; + } * header = iobuf->data; + + iob_pull ( iobuf, sizeof ( *header ) ); + return net_rx ( iobuf, netdev, header->proto, NULL ); + + + struct ibhdr *ibhdr = iobuf->data; /* Sanity check */ From da23e8d287ec8d6c9a40addab75cfb06465d4c83 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 13 Sep 2007 17:47:14 +0100 Subject: [PATCH 14/84] Start constructing a generic poll() routine. --- src/drivers/net/mlx_ipoib/mt25218.c | 143 +++++++++++++++++------- src/drivers/net/mlx_ipoib/mt25218_imp.c | 2 + 2 files changed, 104 insertions(+), 41 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index a468f160..17a68c88 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -52,6 +52,7 @@ int prompt_key(int secs, unsigned char *ch_p) return 0; } +#if 0 /************************************************************************** IRQ - handle interrupts ***************************************************************************/ @@ -153,6 +154,7 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * eprintf("tranmit error"); } } +#endif /** * Open network device @@ -161,6 +163,9 @@ static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination * * @ret rc Return status code */ static int mlx_open ( struct net_device *netdev ) { + + ( void ) netdev; + return 0; } @@ -170,6 +175,9 @@ static int mlx_open ( struct net_device *netdev ) { * @v netdev Network device */ static void mlx_close ( struct net_device *netdev ) { + + ( void ) netdev; + } #warning "Broadcast address?" @@ -187,6 +195,8 @@ static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct ibhdr *ibhdr = iobuf->data; + ( void ) netdev; + iob_pull ( iobuf, sizeof ( *ibhdr ) ); if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { @@ -201,17 +211,95 @@ static int mlx_transmit ( struct net_device *netdev, } } +/** + * Handle TX completion + * + * @v netdev Network device + * @v cqe Completion queue entry + */ +static void mlx_tx_complete ( struct net_device *netdev, + struct ib_cqe_st *cqe ) { + netdev_tx_complete_next_err ( netdev, + ( cqe->is_error ? -EIO : 0 ) ); +} + +/** + * Handle RX completion + * + * @v netdev Network device + * @v cqe Completion queue entry + */ +static void mlx_rx_complete ( struct net_device *netdev, + struct ib_cqe_st *cqe ) { + unsigned int len; + struct io_buffer *iobuf; + void *buf; + + /* Check for errors */ + if ( cqe->is_error ) { + netdev_rx_err ( netdev, NULL, -EIO ); + return; + } + + /* Allocate I/O buffer */ + len = cqe->count; + iobuf = alloc_iob ( len ); + if ( ! iobuf ) { + netdev_rx_err ( netdev, NULL, -ENOMEM ); + return; + } + buf = get_rcv_wqe_buf ( cqe->wqe, 1 ); + memcpy ( iob_put ( iobuf, len ), buf, len ); + // DBG ( "Received packet header:\n" ); + // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; + // DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), + // be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); + // DBG ( "Received packet:\n" ); + // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + netdev_rx ( netdev, iobuf ); +} + +/** + * Poll completion queue + * + * @v netdev Network device + * @v cq Completion queue + */ +static void mlx_poll_cq ( struct net_device *netdev, cq_t cq ) { + struct mlx_nic *mlx = netdev->priv; + struct ib_cqe_st cqe; + uint8_t num_cqes; + + while ( 1 ) { + /* Poll for single completion queue entry */ + ib_poll_cq ( cq, &cqe, &num_cqes ); + + /* Return if no entries in the queue */ + if ( ! num_cqes ) + return; + + DBGC ( mlx, "MLX %p cpl in %p: err %x send %x " + "wqe %p count %lx\n", mlx, cq, cqe.is_error, + cqe.is_send, cqe.wqe, cqe.count ); + + /* Handle TX/RX completion */ + if ( cqe.is_send ) { + mlx_tx_complete ( netdev, &cqe ); + } else { + mlx_rx_complete ( netdev, &cqe ); + } + + /* Free associated work queue entry */ + free_wqe ( cqe.wqe ); + } +} + /** * Poll for completed and received packets * * @v netdev Network device */ static void mlx_poll ( struct net_device *netdev ) { - struct ib_cqe_st ib_cqe; - uint8_t num_cqes; - unsigned int len; - struct io_buffer *iobuf; - void *buf; int rc; if ( ( rc = poll_error_buf() ) != 0 ) { @@ -224,41 +312,7 @@ static void mlx_poll ( struct net_device *netdev ) { return; } - if ( ( rc = ib_poll_cq ( ipoib_data.rcv_cqh, &ib_cqe, - &num_cqes ) ) != 0 ) { - DBG ( "ib_poll_cq() failed: %s\n", strerror ( rc ) ); - return; - } - - if ( ! num_cqes ) - return; - - if ( ib_cqe.is_error ) { - DBG ( "cqe error\n" ); - free_wqe ( ib_cqe.wqe ); - return; - } - - len = ib_cqe.count; - iobuf = alloc_iob ( len ); - if ( ! iobuf ) { - DBG ( "out of memory\n" ); - free_wqe ( ib_cqe.wqe ); - return; - } - buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); - memcpy ( iob_put ( iobuf, len ), buf, len ); - // DBG ( "Received packet header:\n" ); - // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; - // DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), - // be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); - - // DBG ( "Received packet:\n" ); - // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); - - netdev_rx ( netdev, iobuf ); - - free_wqe ( ib_cqe.wqe ); + mlx_poll_cq ( netdev, ipoib_data.rcv_cqh ); } /** @@ -268,6 +322,10 @@ static void mlx_poll ( struct net_device *netdev ) { * @v enable Interrupts should be enabled */ static void mlx_irq ( struct net_device *netdev, int enable ) { + + ( void ) netdev; + ( void ) enable; + } static struct net_device_operations mlx_operations = { @@ -278,6 +336,7 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; +#if 0 /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ @@ -297,6 +356,7 @@ static void mt25218_disable(struct nic *nic) disable_imp(); } } +#endif /** * Remove PCI device @@ -305,7 +365,6 @@ static void mt25218_disable(struct nic *nic) */ static void mlx_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); - struct mlx_nic *mlx = netdev->priv; unregister_netdev ( netdev ); ipoib_close(0); @@ -313,6 +372,7 @@ static void mlx_remove ( struct pci_device *pci ) { netdev_put ( netdev ); } +#if 0 static struct nic_operations mt25218_operations = { .connect = dummy_connect, .poll = mt25218_poll, @@ -380,6 +440,7 @@ static int mt25218_probe(struct nic *nic, struct pci_device *pci) /* else */ return 0; } +#endif /** * Probe PCI device diff --git a/src/drivers/net/mlx_ipoib/mt25218_imp.c b/src/drivers/net/mlx_ipoib/mt25218_imp.c index 301f31df..efa37948 100644 --- a/src/drivers/net/mlx_ipoib/mt25218_imp.c +++ b/src/drivers/net/mlx_ipoib/mt25218_imp.c @@ -45,6 +45,7 @@ static void be_to_cpu_buf(void *buf, int size) #include "ib_driver.c" #include "ipoib.c" +#if 0 static int probe_imp(struct pci_device *pci, struct nic *nic) { int rc; @@ -232,3 +233,4 @@ fatal_handling: return -1; } +#endif From e9df4f691e8c3fce1a39464c0612ee627e545087 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 10:17:29 +0100 Subject: [PATCH 15/84] Add EX_FLD_BE() --- src/drivers/net/mlx_ipoib/bit_ops.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index e3fb4331..b67f92ce 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -80,6 +80,14 @@ struct addr_64_st { */ #define MT_EXTRACT_ARRAY32(A,O,S) MT_EXTRACT32(((__u32*)A)[O >> 5],(O & MASK32(5)),S) +/* + * MT_EXTRACT_ARRAY32_BE macro is similar to EXTRACT but works on an array of (__u32), + * thus offset may be larger than 32 (but not size). + * + * (added by mcb30) + */ +#define MT_EXTRACT_ARRAY32_BE(A,O,S) MT_EXTRACT32(be32_to_cpu(((__u32*)A)[O >> 5]),(O & MASK32(5)),S) + /* * MT_INSERT_ARRAY32 macro is similar to INSERT but works on an array of (__u32), * thus offset may be larger than 32 (but not size). @@ -90,6 +98,8 @@ struct addr_64_st { #define EX_FLD(a, st, fld) MT_EXTRACT_ARRAY32(a, MT_BIT_OFFSET(st, fld), MT_BIT_SIZE(st, fld)) +#define EX_FLD_BE(a, st, fld) MT_EXTRACT_ARRAY32_BE(a, MT_BIT_OFFSET(st, fld), MT_BIT_SIZE(st, fld)) + /* return the address of the dword holding the field buf = pointer to buffer where to place the value From e69863b5fb70422159e08c3c2d2e815bc941b92a Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 10:18:09 +0100 Subject: [PATCH 16/84] Proof-of-concept to manually parse completion event --- src/drivers/net/mlx_ipoib/mt25218.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 17a68c88..810a479a 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -27,7 +27,6 @@ struct mlx_nic { #include "mt_version.c" #include "mt25218_imp.c" -/* NIC specific static variables go here */ int prompt_key(int secs, unsigned char *ch_p) { @@ -265,12 +264,31 @@ static void mlx_rx_complete ( struct net_device *netdev, * @v netdev Network device * @v cq Completion queue */ -static void mlx_poll_cq ( struct net_device *netdev, cq_t cq ) { +static void mlx_poll_cq ( struct net_device *netdev, + struct cq_st *cq ) { struct mlx_nic *mlx = netdev->priv; struct ib_cqe_st cqe; uint8_t num_cqes; while ( 1 ) { + + unsigned long cons_idx; + union cqe_st *temp; + + cons_idx = ( cq->cons_counter & ( cq->num_cqes - 1 ) ); + temp = &cq->cq_buf[cons_idx]; + if ( EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, + owner ) == 0 ) { + DBG ( "software owned\n" ); + DBGC_HD ( mlx, temp, sizeof ( *temp ) ); + DBG ( "my_qpn=%lx, g=%ld, s=%ld, op=%02lx, cnt=%lx\n", + EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, my_qpn ), + EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, g ), + EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, s ), + EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, opcode ), + EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, byte_cnt ) ); + } + /* Poll for single completion queue entry */ ib_poll_cq ( cq, &cqe, &num_cqes ); @@ -312,6 +330,7 @@ static void mlx_poll ( struct net_device *netdev ) { return; } + // mlx_poll_cq ( netdev, ipoib_data.snd_cqh ); mlx_poll_cq ( netdev, ipoib_data.rcv_cqh ); } From 08e8dfd801afd35f2f006520b1df78d05de1921a Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 11:10:25 +0100 Subject: [PATCH 17/84] Now handling TX completions in our poll loop. --- src/drivers/net/mlx_ipoib/mt25218.c | 113 ++++++++++++++++------------ src/include/gpxe/infiniband.h | 19 ++--- src/net/infiniband.c | 17 +---- 3 files changed, 74 insertions(+), 75 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 810a479a..020f9294 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -16,9 +16,6 @@ Skeleton NIC driver for Etherboot #include #include -struct mlx_nic { -}; - /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ @@ -27,6 +24,16 @@ struct mlx_nic { #include "mt_version.c" #include "mt25218_imp.c" +struct mlx_nic { + /** Queue pair handle */ + udqp_t ipoib_qph; + /** Broadcast Address Vector */ + ud_av_t bcast_av; + /** Send completion queue */ + cq_t snd_cqh; + /** Receive completion queue */ + cq_t rcv_cqh; +}; int prompt_key(int secs, unsigned char *ch_p) { @@ -192,8 +199,28 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; */ static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { - struct ibhdr *ibhdr = iobuf->data; + struct mlx_nic *mlx = netdev->priv; + ud_send_wqe_t snd_wqe; + int rc; + snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); + if ( ! snd_wqe ) { + DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); + return -ENOBUFS; + } + + prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, + iobuf->data, 0, iob_len ( iobuf ), 0 ); + if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { + DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", + mlx, snd_wqe, strerror ( rc ) ); + free_wqe ( snd_wqe ); + return rc; + } + + return 0; + +#if 0 ( void ) netdev; iob_pull ( iobuf, sizeof ( *ibhdr ) ); @@ -208,46 +235,47 @@ static int mlx_transmit ( struct net_device *netdev, ntohs ( ibhdr->proto ), iobuf->data, iob_len ( iobuf ) ); } +#endif } /** * Handle TX completion * * @v netdev Network device - * @v cqe Completion queue entry + * @v ib_cqe Completion queue entry */ static void mlx_tx_complete ( struct net_device *netdev, - struct ib_cqe_st *cqe ) { + struct ib_cqe_st *ib_cqe ) { netdev_tx_complete_next_err ( netdev, - ( cqe->is_error ? -EIO : 0 ) ); + ( ib_cqe->is_error ? -EIO : 0 ) ); } /** * Handle RX completion * * @v netdev Network device - * @v cqe Completion queue entry + * @v ib_cqe Completion queue entry */ static void mlx_rx_complete ( struct net_device *netdev, - struct ib_cqe_st *cqe ) { + struct ib_cqe_st *ib_cqe ) { unsigned int len; struct io_buffer *iobuf; void *buf; /* Check for errors */ - if ( cqe->is_error ) { + if ( ib_cqe->is_error ) { netdev_rx_err ( netdev, NULL, -EIO ); return; } /* Allocate I/O buffer */ - len = cqe->count; + len = ( ib_cqe->count - GRH_SIZE ); iobuf = alloc_iob ( len ); if ( ! iobuf ) { netdev_rx_err ( netdev, NULL, -ENOMEM ); return; } - buf = get_rcv_wqe_buf ( cqe->wqe, 1 ); + buf = get_rcv_wqe_buf ( ib_cqe->wqe, 1 ); memcpy ( iob_put ( iobuf, len ), buf, len ); // DBG ( "Received packet header:\n" ); // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; @@ -263,52 +291,33 @@ static void mlx_rx_complete ( struct net_device *netdev, * * @v netdev Network device * @v cq Completion queue + * @v handler Completion handler */ -static void mlx_poll_cq ( struct net_device *netdev, - struct cq_st *cq ) { +static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, + void ( * handler ) ( struct net_device *netdev, + struct ib_cqe_st *ib_cqe ) ) { struct mlx_nic *mlx = netdev->priv; - struct ib_cqe_st cqe; + struct ib_cqe_st ib_cqe; uint8_t num_cqes; while ( 1 ) { - unsigned long cons_idx; - union cqe_st *temp; - - cons_idx = ( cq->cons_counter & ( cq->num_cqes - 1 ) ); - temp = &cq->cq_buf[cons_idx]; - if ( EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, - owner ) == 0 ) { - DBG ( "software owned\n" ); - DBGC_HD ( mlx, temp, sizeof ( *temp ) ); - DBG ( "my_qpn=%lx, g=%ld, s=%ld, op=%02lx, cnt=%lx\n", - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, my_qpn ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, g ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, s ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, opcode ), - EX_FLD_BE ( temp, arbelprm_completion_queue_entry_st, byte_cnt ) ); - } - /* Poll for single completion queue entry */ - ib_poll_cq ( cq, &cqe, &num_cqes ); + ib_poll_cq ( cq, &ib_cqe, &num_cqes ); /* Return if no entries in the queue */ if ( ! num_cqes ) return; DBGC ( mlx, "MLX %p cpl in %p: err %x send %x " - "wqe %p count %lx\n", mlx, cq, cqe.is_error, - cqe.is_send, cqe.wqe, cqe.count ); + "wqe %p count %lx\n", mlx, cq, ib_cqe.is_error, + ib_cqe.is_send, ib_cqe.wqe, ib_cqe.count ); /* Handle TX/RX completion */ - if ( cqe.is_send ) { - mlx_tx_complete ( netdev, &cqe ); - } else { - mlx_rx_complete ( netdev, &cqe ); - } - + handler ( netdev, &ib_cqe ); + /* Free associated work queue entry */ - free_wqe ( cqe.wqe ); + free_wqe ( ib_cqe.wqe ); } } @@ -318,6 +327,7 @@ static void mlx_poll_cq ( struct net_device *netdev, * @v netdev Network device */ static void mlx_poll ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; int rc; if ( ( rc = poll_error_buf() ) != 0 ) { @@ -330,8 +340,8 @@ static void mlx_poll ( struct net_device *netdev ) { return; } - // mlx_poll_cq ( netdev, ipoib_data.snd_cqh ); - mlx_poll_cq ( netdev, ipoib_data.rcv_cqh ); + mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); + mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } /** @@ -386,7 +396,7 @@ static void mlx_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); unregister_netdev ( netdev ); - ipoib_close(0); + ib_driver_close ( 0 ); netdev_nullify ( netdev ); netdev_put ( netdev ); } @@ -473,6 +483,7 @@ static int mlx_probe ( struct pci_device *pci, struct net_device *netdev; struct mlx_nic *mlx; struct ib_mac *mac; + udqp_t qph; int rc; /* Allocate net device */ @@ -489,11 +500,15 @@ static int mlx_probe ( struct pci_device *pci, adjust_pci_device ( pci ); /* Initialise hardware */ - if ( ( rc = ipoib_init ( pci ) ) != 0 ) + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ipoib_init; + mlx->ipoib_qph = qph; + mlx->bcast_av = ib_data.bcast_av; + mlx->snd_cqh = ib_data.ipoib_snd_cq; + mlx->rcv_cqh = ib_data.ipoib_rcv_cq; mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( ipoib_data.ipoib_qpn ); - memcpy ( &mac->gid, ipoib_data.port_gid_raw, sizeof ( mac->gid ) ); + mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); + memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) @@ -503,7 +518,7 @@ static int mlx_probe ( struct pci_device *pci, err_register_netdev: err_ipoib_init: - ipoib_close(0); + ib_driver_close ( 0 ); netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 11cec189..9f126b49 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,10 +10,6 @@ #include #include -/** Infiniband hardware address length */ -#define IB_ALEN 20 -#define IB_HLEN 24 - /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -40,6 +36,9 @@ struct ib_global_route_header { struct ib_gid dgid; } __attribute__ (( packed )); +/** Infiniband MAC address length */ +#define IB_ALEN 20 + /** An Infiniband MAC address */ struct ib_mac { /** Queue pair number @@ -51,15 +50,11 @@ struct ib_mac { struct ib_gid gid; } __attribute__ (( packed )); -/** An Infiniband header - * - * This data structure doesn't represent the on-wire format, but does - * contain all the information required by the driver to construct the - * packet. - */ +/** Infiniband link-layer header length */ +#define IB_HLEN 4 + +/** An Infiniband link-layer header */ struct ibhdr { - /** Peer address */ - uint8_t peer[IB_ALEN]; /** Network-layer protocol */ uint16_t proto; /** Reserved, must be zero */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index c7fabd0e..52811b92 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -50,12 +50,12 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, struct net_protocol *net_protocol, const void *ll_dest ) { struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); - /* Build Infiniband header */ - memcpy ( ibhdr->peer, ll_dest, IB_ALEN ); ibhdr->proto = net_protocol->net_proto; ibhdr->reserved = 0; + ( void ) ll_dest; + /* Hand off to network device */ return netdev_tx ( netdev, iobuf ); } @@ -70,17 +70,6 @@ static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, * network-layer protocol. */ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { - - struct { - uint16_t proto; - uint16_t reserved; - } * header = iobuf->data; - - iob_pull ( iobuf, sizeof ( *header ) ); - return net_rx ( iobuf, netdev, header->proto, NULL ); - - - struct ibhdr *ibhdr = iobuf->data; /* Sanity check */ @@ -95,7 +84,7 @@ static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { iob_pull ( iobuf, sizeof ( *ibhdr ) ); /* Hand off to network-layer protocol */ - return net_rx ( iobuf, netdev, ibhdr->proto, ibhdr->peer ); + return net_rx ( iobuf, netdev, ibhdr->proto, NULL ); } /** From 75fbc96f754c8cf66b32c52f7b6678308dfdbc26 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 11:23:06 +0100 Subject: [PATCH 18/84] Remove some dead code --- src/drivers/net/mlx_ipoib/mt25218.c | 255 ++-------------------------- 1 file changed, 10 insertions(+), 245 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 020f9294..3cbca49a 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -21,7 +21,6 @@ Skeleton NIC driver for Etherboot /* to get the interface to the body of the program */ #include "nic.h" -#include "mt_version.c" #include "mt25218_imp.c" struct mlx_nic { @@ -35,133 +34,6 @@ struct mlx_nic { cq_t rcv_cqh; }; -int prompt_key(int secs, unsigned char *ch_p) -{ - unsigned long tmo; - unsigned char ch; - - for (tmo = currticks() + secs * TICKS_PER_SEC; currticks() < tmo;) { - if (iskey()) { - ch = getchar(); - /* toupper does not work ... */ - if (ch == 'v') - ch = 'V'; - if (ch == 'i') - ch = 'I'; - if ((ch=='V') || (ch=='I')) { - *ch_p = ch; - return 1; - } - } - } - - return 0; -} - -#if 0 -/************************************************************************** -IRQ - handle interrupts -***************************************************************************/ -static void mt25218_irq(struct nic *nic, irq_action_t action) -{ - /* This routine is somewhat optional. Etherboot itself - * doesn't use interrupts, but they are required under some - * circumstances when we're acting as a PXE stack. - * - * If you don't implement this routine, the only effect will - * be that your driver cannot be used via Etherboot's UNDI - * API. This won't affect programs that use only the UDP - * portion of the PXE API, such as pxelinux. - */ - - if (0) { - nic = NULL; - } - switch (action) { - case DISABLE: - case ENABLE: - /* Set receive interrupt enabled/disabled state */ - /* - outb ( action == ENABLE ? IntrMaskEnabled : IntrMaskDisabled, - nic->ioaddr + IntrMaskRegister ); - */ - break; - case FORCE: - /* Force NIC to generate a receive interrupt */ - /* - outb ( ForceInterrupt, nic->ioaddr + IntrForceRegister ); - */ - break; - } -} - -/************************************************************************** -POLL - Wait for a frame -***************************************************************************/ -static int mt25218_poll(struct nic *nic, int retrieve) -{ - /* Work out whether or not there's an ethernet packet ready to - * read. Return 0 if not. - */ - /* - if ( ! ) return 0; - */ - - /* retrieve==0 indicates that we are just checking for the - * presence of a packet but don't want to read it just yet. - */ - /* - if ( ! retrieve ) return 1; - */ - - /* Copy data to nic->packet. Data should include the - * link-layer header (dest MAC, source MAC, type). - * Store length of data in nic->packetlen. - * Return true to indicate a packet has been read. - */ - /* - nic->packetlen = ; - memcpy ( nic->packet, , ); - return 1; - */ - unsigned int size; - int rc; - rc = poll_imp(nic, retrieve, &size); - if (rc) { - return 0; - } - - if (size == 0) { - return 0; - } - - nic->packetlen = size; - - return 1; -} - -/************************************************************************** -TRANSMIT - Transmit a frame -***************************************************************************/ -static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination */ - unsigned int type, /* Type */ - unsigned int size, /* size */ - const char *packet) -{ /* Packet */ - int rc; - - /* Transmit packet to dest MAC address. You will need to - * construct the link-layer header (dest MAC, source MAC, - * type). - */ - if (nic) { - rc = transmit_imp(dest, type, packet, size); - if (rc) - eprintf("tranmit error"); - } -} -#endif - /** * Open network device * @@ -219,23 +91,6 @@ static int mlx_transmit ( struct net_device *netdev, } return 0; - -#if 0 - ( void ) netdev; - - iob_pull ( iobuf, sizeof ( *ibhdr ) ); - - if ( memcmp ( ibhdr->peer, ib_broadcast, IB_ALEN ) == 0 ) { - printf ( "Sending broadcast packet\n" ); - return send_bcast_packet ( ntohs ( ibhdr->proto ), - iobuf->data, iob_len ( iobuf ) ); - } else { - printf ( "Sending unicast packet\n" ); - return send_ucast_packet ( ibhdr->peer, - ntohs ( ibhdr->proto ), - iobuf->data, iob_len ( iobuf ) ); - } -#endif } /** @@ -275,14 +130,12 @@ static void mlx_rx_complete ( struct net_device *netdev, netdev_rx_err ( netdev, NULL, -ENOMEM ); return; } + + /* Fill I/O buffer */ buf = get_rcv_wqe_buf ( ib_cqe->wqe, 1 ); memcpy ( iob_put ( iobuf, len ), buf, len ); - // DBG ( "Received packet header:\n" ); - // struct recv_wqe_st *rcv_wqe = ib_cqe.wqe; - // DBG_HD ( get_rcv_wqe_buf(ib_cqe.wqe, 0), - // be32_to_cpu(rcv_wqe->mpointer[0].byte_count) ); - // DBG ( "Received packet:\n" ); - // DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + /* Hand off to network stack */ netdev_rx ( netdev, iobuf ); } @@ -335,11 +188,15 @@ static void mlx_poll ( struct net_device *netdev ) { return; } + /* Drain event queue. We can ignore events, since we're going + * to just poll all completion queues anyway. + */ if ( ( rc = drain_eq() ) != 0 ) { DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); return; } + /* Poll completion queues */ mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } @@ -365,28 +222,6 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; -#if 0 -/************************************************************************** -DISABLE - Turn off ethernet interface -***************************************************************************/ -static void mt25218_disable(struct nic *nic) -{ - /* put the card in its initial state */ - /* This function serves 3 purposes. - * This disables DMA and interrupts so we don't receive - * unexpected packets or interrupts from the card after - * etherboot has finished. - * This frees resources so etherboot may use - * this driver on another interface - * This allows etherboot to reinitialize the interface - * if something is something goes wrong. - */ - if (nic || 1) { // ???? - disable_imp(); - } -} -#endif - /** * Remove PCI device * @@ -401,76 +236,6 @@ static void mlx_remove ( struct pci_device *pci ) { netdev_put ( netdev ); } -#if 0 -static struct nic_operations mt25218_operations = { - .connect = dummy_connect, - .poll = mt25218_poll, - .transmit = mt25218_transmit, - .irq = mt25218_irq, -}; - -/************************************************************************** -PROBE - Look for an adapter, this routine's visible to the outside -***************************************************************************/ - -static int mt25218_probe(struct nic *nic, struct pci_device *pci) -{ - int rc; - unsigned char user_request; - - if (pci->vendor != MELLANOX_VENDOR_ID) { - eprintf(""); - return 0; - } - - printf("\n"); - printf("Mellanox Technologies LTD - Boot over IB implementaion\n"); - printf("Build version = %s\n\n", build_revision); - - verbose_messages = 0; - print_info = 0; - printf("Press within 3 seconds:\n"); - printf("V - to increase verbosity\n"); - printf("I - to print information\n"); - if (prompt_key(3, &user_request)) { - if (user_request == 'V') { - printf("User selected verbose messages\n"); - verbose_messages = 1; - } - else if (user_request == 'I') { - printf("User selected to print information\n"); - print_info = 1; - } - } - printf("\n"); - - adjust_pci_device(pci); - - nic->priv_data = NULL; - rc = probe_imp(pci, nic); - - /* give the user a chance to look at the info */ - if (print_info) - sleep(5); - - if (!rc) { - /* store NIC parameters */ - nic->ioaddr = pci->ioaddr & ~3; - nic->irqno = pci->irq; - /* point to NIC specific routines */ - nic->nic_op = &mt25218_operations; - - uint8_t fixed_node_addr[ETH_ALEN] = { 0x00, 0x02, 0xc9, - 0x20, 0xf5, 0x95 }; - memcpy ( nic->node_addr, fixed_node_addr, ETH_ALEN ); - - return 1; - } - /* else */ - return 0; -} -#endif - /** * Probe PCI device * @@ -525,8 +290,8 @@ static int mlx_probe ( struct pci_device *pci, } static struct pci_device_id mlx_nics[] = { - PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), - PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), + PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), + PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), }; struct pci_driver mlx_driver __pci_driver = { From 9d08b7c692aac5b1790555f8fc28ddb52ef28bb5 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 14 Sep 2007 20:29:44 +0100 Subject: [PATCH 19/84] Starting to introduce an Infiniband device abstraction --- src/drivers/net/mlx_ipoib/mt25218.c | 138 ++++++++++++++++++++++++++++ src/include/gpxe/infiniband.h | 63 +++++++++++++ 2 files changed, 201 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 3cbca49a..c6015fb2 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -72,6 +72,7 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; ud_send_wqe_t snd_wqe; int rc; @@ -222,6 +223,143 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; + + +int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) { + unsigned int wqe_idx; + unsigned int new_write_ptr; + + /* Allocate queue entry */ + wqe_idx = new_write_ptr = wq->write_ptr; + if ( wq->iobuf[wqe_idx] ) + return -ENOBUFS; + wq->iobuf[wqe_idx] = iobuf; + + /* Update write pointer */ + new_write_ptr++; + new_write_ptr &= ( wq->num_wqes - 1 ); + wq->write_ptr = new_write_ptr; + + return wqe_idx; +} + +static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) { + assert ( wq->iobuf[wqe_idx] != NULL ); + wq->iobuf[wqe_idx] = NULL; +} + +static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ) { + struct mlx *mlx = ibdev->priv; + struct ib_work_queue *wq = &qp->send; + struct mlx_work_queue *mlx_wq = wq->priv; + unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); + unsigned int prev_wqe_idx; + struct ud_send_wqe_st *prev_wqe; + unsigned int wqe_idx; + struct ud_send_wqe_st *wqe; + struct ib_gid *gid; + size_t nds; + struct send_doorbell_st doorbell; + + /* Allocate work queue entry */ + prev_wqe_idx = wq->posted; + wqe_idx = ( prev_wqe_index + 1 ); + if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { + DBGC ( mlx, "MLX %p send queue full", mlx ); + return -ENOBUFS; + } + prev_wqe = &mlx_wq->wqe[prev_wqe_idx & wqe_idx_mask]; + wqe = &mlx_wq->wqe[wqe_idx & wqe_idx_mask]; + + /* Construct work queue entry */ + memset ( &wqe->next.control, 0, + sizeof ( wqe->next.control ) ); + MLX_POPULATE_1 ( &wqe->next.control, + arbelprm_wqe_segment_ctrl_send_st, 0, + always1, 1 ); + memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, + pd, GLOBAL_PD, + port_number, mlx->port ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, + rlid, av->remote_lid, + g, av->gid_present ); + MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, + max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), + msg, 3 ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, + sl, av->sl ); + gid = ( av->gid_present ? av->gid : &ib_no_gid ); + memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), + gid, sizeof ( *gid ) ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, + destination_qp, av->dest_qp ); + MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, + q_key, av->qkey ); + wqe->mpointer[0].local_addr_l = + cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + + /* Update previous work queue entry's "next" field */ + nds = ( offsetof ( typeof ( *wqe ), mpointer ) + + sizeof ( wqe->mpointer[0] ) ); + MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, + nopcode, XDEV_NOPCODE_SEND ); + MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, + nds, nds, + f, 1, + always1, 1 ); + + /* Ring doorbell */ + + doorbell index is a property of the queue pair + + + MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, + counter, ( wqe_idx & 0xffff ) ); + memset ( &doorbell, 0, sizeof ( doorbell ) ); + MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0, + nopcode, XDEV_NOPCODE_SEND, + f, 1, + wqe_counter, ( prev_wqe_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1, + nds, nds, + qpn, qp->qpn ); + barrier(); + + wq->posted = wqe_idx; + + + struct mlx_nic *mlx = netdev->priv; + ud_av_t av = iobuf->data; + ud_send_wqe_t snd_wqe; + int rc; + + snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); + if ( ! snd_wqe ) { + DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); + return -ENOBUFS; + } + + prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, + iobuf->data, 0, iob_len ( iobuf ), 0 ); + if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { + DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", + mlx, snd_wqe, strerror ( rc ) ); + free_wqe ( snd_wqe ); + return rc; + } + + +} + +static struct ib_device_operations mlx_ib_operations = { + .post_send = mlx_post_send, +}; + /** * Remove PCI device * diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 9f126b49..22a8a982 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -61,6 +61,69 @@ struct ibhdr { uint16_t reserved; } __attribute__ (( packed )); +/** An Infiniband Work Queue */ +struct ib_work_queue { + /** Number of work queue entries */ + unsigned int num_wqes; + /** Posted index + * + * This is the index of the most recently posted entry. + */ + unsigned int posted; + /** Driver-private data + * + * Typically used to hold the address of the work queue. + */ + void *priv; + /** I/O buffers assigned to work queue */ + struct io_buffer *iobuf[0]; +}; + +/** An Infiniband Queue Pair */ +struct ib_queue_pair { + /** Queue Pair Number */ + uint32_t qpn; + /** Send queue */ + struct ib_work_queue send; + /** Receive queue */ + struct ib_work_queue recv; +}; + +/** An Infiniband Address Vector */ +struct ib_address_vector { + +}; + +/** + * Infiniband device operations + * + * These represent a subset of the Infiniband Verbs. + */ +struct ib_device_operations { + /** Post Send work queue entry + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer + * @v av Address vector + * @v qp Queue pair + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_send ) ( struct ib_device *ibdev, + struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ); +}; + + + + + + extern struct ll_protocol infiniband_protocol; extern const char * ib_ntoa ( const void *ll_addr ); From 38a73b55c445ffa596d4b4ecc2aef6476d00a3c1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 00:19:38 +0100 Subject: [PATCH 20/84] Now at least compiles --- src/drivers/net/mlx_ipoib/bit_ops.h | 98 +++++++++++++++++++++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 100 ++++++++++++---------------- src/drivers/net/mlx_ipoib/mt25218.h | 18 +++++ src/include/gpxe/infiniband.h | 33 +++++++-- 4 files changed, 185 insertions(+), 64 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index b67f92ce..74823a60 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -133,4 +133,102 @@ struct addr_64_st { field; \ }) + + +/* Remaining code Copyright Fen Systems Ltd. 2007 */ + +/** Bit offset of a field within a pseudo_bit_t structure */ +#define MLX_BIT_OFFSET( _structure, _field ) \ + offsetof ( struct _structure, _field ) + +/** Bit width of a field within a pseudo_bit_t structure */ +#define MLX_BIT_WIDTH( _structure, _field ) \ + sizeof ( ( ( struct _structure * ) NULL )->_field ) + +/* + * Assemble native-endian dword from named fields and values + * + */ + +#define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ + ( (_value) << \ + ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) ) + +#define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_3( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_4( _structure, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Build native-endian (positive) dword bitmasks from named fields + * + */ + +#define MLX_MASK_1( _structure, _index, _field ) \ + MLX_ASSEMBLE_1 ( _structure, _index, _field, \ + ( ( 1 << MLX_BIT_WIDTH ( _structure, \ + _field ) ) - 1 ) ) + +#define MLX_MASK_2( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_3( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_4( _structure, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure, _index, _field ) | \ + MLX_MASK_3 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Populate big-endian dwords from named fields and values + * + */ + +#define MLX_POPULATE( _base, _index, _assembled ) \ + do { \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ + } while ( 0 ) + +#define MLX_POPULATE_1( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_2( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_3( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) + +#define MLX_POPULATE_4( _base, _structure, _index, ... ) \ + MLX_POPULATE ( _base, _index, \ + MLX_ASSEMBLE_4 ( _structure, _index, __VA_ARGS__ ) ) + +/* + * Modify big-endian dword using named field and value + * + */ + +#define MLX_MODIFY( _base, _structure, _index, _field, _value ) \ + do { \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( _structure, _index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( _structure, _index, \ + _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ + } while ( 0 ) + #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index c6015fb2..e8290bb6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -224,28 +224,32 @@ static struct net_device_operations mlx_operations = { }; +struct mlx_send_work_queue { + /** Doorbell number */ + unsigned int doorbell_idx; + /** Work queue entries */ + struct ud_send_wqe_st *wqe; +}; -int ib_alloc_wqe ( struct ib_work_queue *wq, struct io_buffer *iobuf ) { - unsigned int wqe_idx; - unsigned int new_write_ptr; +struct mlx { + /** User Access Region */ + unsigned long uar; + /** Doorbell records */ + union db_record_st *db_rec; +}; - /* Allocate queue entry */ - wqe_idx = new_write_ptr = wq->write_ptr; - if ( wq->iobuf[wqe_idx] ) - return -ENOBUFS; - wq->iobuf[wqe_idx] = iobuf; +static struct ib_gid mlx_no_gid = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } +}; - /* Update write pointer */ - new_write_ptr++; - new_write_ptr &= ( wq->num_wqes - 1 ); - wq->write_ptr = new_write_ptr; +static void mlx_ring_doorbell ( struct mlx *mlx, void *db_reg, + unsigned int offset ) { + uint32_t *db_reg_dword = db_reg; - return wqe_idx; -} - -static inline void ib_free_wqe ( struct ib_work_queue *wq, int wqe_idx ) { - assert ( wq->iobuf[wqe_idx] != NULL ); - wq->iobuf[wqe_idx] = NULL; + barrier(); + writel ( db_reg_dword[0], ( mlx->uar + offset + 0 ) ); + barrier(); + writel ( db_reg_dword[1], ( mlx->uar + offset + 4 ) ); } static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, @@ -253,7 +257,7 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_queue_pair *qp ) { struct mlx *mlx = ibdev->priv; struct ib_work_queue *wq = &qp->send; - struct mlx_work_queue *mlx_wq = wq->priv; + struct mlx_send_work_queue *mlx_wq = wq->priv; unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); unsigned int prev_wqe_idx; struct ud_send_wqe_st *prev_wqe; @@ -261,11 +265,12 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ud_send_wqe_st *wqe; struct ib_gid *gid; size_t nds; - struct send_doorbell_st doorbell; + union db_record_st *db_rec; + struct send_doorbell_st db_reg; /* Allocate work queue entry */ prev_wqe_idx = wq->posted; - wqe_idx = ( prev_wqe_index + 1 ); + wqe_idx = ( prev_wqe_idx + 1 ); if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { DBGC ( mlx, "MLX %p send queue full", mlx ); return -ENOBUFS; @@ -282,16 +287,16 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, pd, GLOBAL_PD, - port_number, mlx->port ); + port_number, PXE_IB_PORT ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, - rlid, av->remote_lid, + rlid, av->dlid, g, av->gid_present ); MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), msg, 3 ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, sl, av->sl ); - gid = ( av->gid_present ? av->gid : &ib_no_gid ); + gid = ( av->gid_present ? &av->gid : &mlx_no_gid ); memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), gid, sizeof ( *gid ) ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, @@ -305,55 +310,34 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Update previous work queue entry's "next" field */ nds = ( offsetof ( typeof ( *wqe ), mpointer ) + sizeof ( wqe->mpointer[0] ) ); - MLX_MODIFY_1 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, - nopcode, XDEV_NOPCODE_SEND ); + MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, + nopcode, XDEV_NOPCODE_SEND ); MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, nds, nds, f, 1, always1, 1 ); - /* Ring doorbell */ - - doorbell index is a property of the queue pair - - - MLX_POPULATE_1 ( mlx_wq->send_uar_context, arbelprm_qp_db_record_st, 0, + /* Update doorbell record */ + db_rec = &mlx->db_rec[mlx_wq->doorbell_idx]; + MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, counter, ( wqe_idx & 0xffff ) ); - memset ( &doorbell, 0, sizeof ( doorbell ) ); - MLX_POPULATE_4 ( &doorbell, arbelprm_send_doorbell_st, 0, + barrier(); + + /* Ring doorbell register */ + MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, nopcode, XDEV_NOPCODE_SEND, f, 1, wqe_counter, ( prev_wqe_idx & 0xffff ), wqe_cnt, 1 ); - MLX_POPULATE_2 ( &doorbell, arbelprm_send_doorbell_st, 1, + MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, nds, nds, qpn, qp->qpn ); - barrier(); + mlx_ring_doorbell ( mlx, &db_reg, POST_SND_OFFSET ); + /* Update work queue's posted index */ wq->posted = wqe_idx; - - struct mlx_nic *mlx = netdev->priv; - ud_av_t av = iobuf->data; - ud_send_wqe_t snd_wqe; - int rc; - - snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); - if ( ! snd_wqe ) { - DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); - return -ENOBUFS; - } - - prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, - iobuf->data, 0, iob_len ( iobuf ), 0 ); - if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { - DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", - mlx, snd_wqe, strerror ( rc ) ); - free_wqe ( snd_wqe ); - return rc; - } - - + return 0; } static struct ib_device_operations mlx_ib_operations = { diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 1e7c8d8b..590d72f6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -342,6 +342,24 @@ struct cq_dbell_st { __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_cmd_doorbell_st)]; } __attribute__ ((packed)); +struct qp_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_qp_db_record_st)]; +} __attribute__ ((packed)); + +struct cq_arm_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_arm_db_record_st)]; +} __attribute__ ((packed)); + +struct cq_ci_db_record_st { + __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_ci_db_record_st)]; +} __attribute__ ((packed)); + +union db_record_st { + struct qp_db_record_st qp; + struct cq_arm_db_record_st cq_arm; + struct cq_ci_db_record_st cq_ci; +} __attribute__ ((packed)); + struct mad_ifc_inprm_st { union mad_u mad; } __attribute__ ((packed)); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 22a8a982..ccb6e49e 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -61,6 +61,9 @@ struct ibhdr { uint16_t reserved; } __attribute__ (( packed )); + + + /** An Infiniband Work Queue */ struct ib_work_queue { /** Number of work queue entries */ @@ -70,10 +73,7 @@ struct ib_work_queue { * This is the index of the most recently posted entry. */ unsigned int posted; - /** Driver-private data - * - * Typically used to hold the address of the work queue. - */ + /** Driver private data */ void *priv; /** I/O buffers assigned to work queue */ struct io_buffer *iobuf[0]; @@ -87,13 +87,30 @@ struct ib_queue_pair { struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; + /** Driver private data */ + void *priv; }; /** An Infiniband Address Vector */ struct ib_address_vector { - + /** Destination Queue Pair */ + unsigned int dest_qp; + /** Queue key */ + unsigned int qkey; + /** Destination Local ID */ + unsigned int dlid; + /** Rate */ + unsigned int rate; + /** Service level */ + unsigned int sl; + /** GID is present */ + unsigned int gid_present; + /** GID */ + struct ib_gid gid; }; +struct ib_device; + /** * Infiniband device operations * @@ -119,7 +136,11 @@ struct ib_device_operations { struct ib_queue_pair *qp ); }; - +/** An Infiniband device */ +struct ib_device { + /** Driver private data */ + void *priv; +}; From 21d4ab3ce2a5a4041f4b5ff81f992aeb3735065e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 00:27:09 +0100 Subject: [PATCH 21/84] Prefix arbel-specific functions etc. with arbel_ --- src/drivers/net/mlx_ipoib/mt25218.c | 85 +++++++++++++++-------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index e8290bb6..e8fcbb40 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -23,6 +23,22 @@ Skeleton NIC driver for Etherboot #include "mt25218_imp.c" +struct arbel_send_work_queue { + /** Doorbell number */ + unsigned int doorbell_idx; + /** Work queue entries */ + struct ud_send_wqe_st *wqe; +}; + +struct arbel { + /** User Access Region */ + unsigned long uar; + /** Doorbell records */ + union db_record_st *db_rec; +}; + + + struct mlx_nic { /** Queue pair handle */ udqp_t ipoib_qph; @@ -224,44 +240,31 @@ static struct net_device_operations mlx_operations = { }; -struct mlx_send_work_queue { - /** Doorbell number */ - unsigned int doorbell_idx; - /** Work queue entries */ - struct ud_send_wqe_st *wqe; -}; -struct mlx { - /** User Access Region */ - unsigned long uar; - /** Doorbell records */ - union db_record_st *db_rec; -}; - -static struct ib_gid mlx_no_gid = { +static struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; -static void mlx_ring_doorbell ( struct mlx *mlx, void *db_reg, - unsigned int offset ) { +static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, + unsigned int offset ) { uint32_t *db_reg_dword = db_reg; barrier(); - writel ( db_reg_dword[0], ( mlx->uar + offset + 0 ) ); + writel ( db_reg_dword[0], ( arbel->uar + offset + 0 ) ); barrier(); - writel ( db_reg_dword[1], ( mlx->uar + offset + 4 ) ); + writel ( db_reg_dword[1], ( arbel->uar + offset + 4 ) ); } -static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, - struct ib_address_vector *av, - struct ib_queue_pair *qp ) { - struct mlx *mlx = ibdev->priv; +static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->priv; struct ib_work_queue *wq = &qp->send; - struct mlx_send_work_queue *mlx_wq = wq->priv; + struct arbel_send_work_queue *arbel_wq = wq->priv; unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); unsigned int prev_wqe_idx; - struct ud_send_wqe_st *prev_wqe; unsigned int wqe_idx; + struct ud_send_wqe_st *prev_wqe; struct ud_send_wqe_st *wqe; struct ib_gid *gid; size_t nds; @@ -272,11 +275,11 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, prev_wqe_idx = wq->posted; wqe_idx = ( prev_wqe_idx + 1 ); if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { - DBGC ( mlx, "MLX %p send queue full", mlx ); + DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; } - prev_wqe = &mlx_wq->wqe[prev_wqe_idx & wqe_idx_mask]; - wqe = &mlx_wq->wqe[wqe_idx & wqe_idx_mask]; + prev_wqe = &arbel_wq->wqe[prev_wqe_idx & wqe_idx_mask]; + wqe = &arbel_wq->wqe[wqe_idx & wqe_idx_mask]; /* Construct work queue entry */ memset ( &wqe->next.control, 0, @@ -296,7 +299,7 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, msg, 3 ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, sl, av->sl ); - gid = ( av->gid_present ? &av->gid : &mlx_no_gid ); + gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), gid, sizeof ( *gid ) ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, @@ -318,7 +321,7 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, always1, 1 ); /* Update doorbell record */ - db_rec = &mlx->db_rec[mlx_wq->doorbell_idx]; + db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, counter, ( wqe_idx & 0xffff ) ); barrier(); @@ -332,7 +335,7 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, nds, nds, qpn, qp->qpn ); - mlx_ring_doorbell ( mlx, &db_reg, POST_SND_OFFSET ); + arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); /* Update work queue's posted index */ wq->posted = wqe_idx; @@ -340,8 +343,8 @@ static int mlx_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, return 0; } -static struct ib_device_operations mlx_ib_operations = { - .post_send = mlx_post_send, +static struct ib_device_operations arbel_ib_operations = { + .post_send = arbel_post_send, }; /** @@ -349,7 +352,7 @@ static struct ib_device_operations mlx_ib_operations = { * * @v pci PCI device */ -static void mlx_remove ( struct pci_device *pci ) { +static void arbel_remove ( struct pci_device *pci ) { struct net_device *netdev = pci_get_drvdata ( pci ); unregister_netdev ( netdev ); @@ -365,8 +368,8 @@ static void mlx_remove ( struct pci_device *pci ) { * @v id PCI ID * @ret rc Return status code */ -static int mlx_probe ( struct pci_device *pci, - const struct pci_device_id *id __unused ) { +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { struct net_device *netdev; struct mlx_nic *mlx; struct ib_mac *mac; @@ -411,14 +414,14 @@ static int mlx_probe ( struct pci_device *pci, return rc; } -static struct pci_device_id mlx_nics[] = { +static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), }; -struct pci_driver mlx_driver __pci_driver = { - .ids = mlx_nics, - .id_count = ( sizeof ( mlx_nics ) / sizeof ( mlx_nics[0] ) ), - .probe = mlx_probe, - .remove = mlx_remove, +struct pci_driver arbel_driver __pci_driver = { + .ids = arbel_nics, + .id_count = ( sizeof ( arbel_nics ) / sizeof ( arbel_nics[0] ) ), + .probe = arbel_probe, + .remove = arbel_remove, }; From 8b27da9de16675f59be082168de9468346ec7183 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 01:35:07 +0100 Subject: [PATCH 22/84] Gets a response out of the hardware. (An error completion, to be precise.) --- src/drivers/net/mlx_ipoib/bit_ops.h | 41 +++++++++++++++--- src/drivers/net/mlx_ipoib/mt25218.c | 67 ++++++++++++++++++++++++++--- src/include/gpxe/infiniband.h | 4 +- 3 files changed, 98 insertions(+), 14 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 74823a60..969de642 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -141,18 +141,33 @@ struct addr_64_st { #define MLX_BIT_OFFSET( _structure, _field ) \ offsetof ( struct _structure, _field ) +/** Dword offset of a field within a pseudo_bit_t structure */ +#define MLX_DWORD_OFFSET( _structure, _field ) \ + ( MLX_BIT_OFFSET ( _structure, _field ) / 32 ) + +/** Dword bit offset of a field within a pseudo_bit_t structure + * + * Yes, using mod-32 would work, but would lose the check for the + * error of specifying a mismatched field name and dword index. + */ +#define MLX_DWORD_BIT_OFFSET( _structure, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) + /** Bit width of a field within a pseudo_bit_t structure */ #define MLX_BIT_WIDTH( _structure, _field ) \ sizeof ( ( ( struct _structure * ) NULL )->_field ) +/** Bit mask for a field within a pseudo_bit_t structure */ +#define MLX_BIT_MASK( _structure, _field ) \ + ( ( 1 << MLX_BIT_WIDTH ( _structure, _field ) ) - 1 ) + /* * Assemble native-endian dword from named fields and values * */ #define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ - ( (_value) << \ - ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) ) + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) #define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ @@ -172,9 +187,8 @@ struct addr_64_st { */ #define MLX_MASK_1( _structure, _index, _field ) \ - MLX_ASSEMBLE_1 ( _structure, _index, _field, \ - ( ( 1 << MLX_BIT_WIDTH ( _structure, \ - _field ) ) - 1 ) ) + ( MLX_BIT_MASK ( _structure, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) #define MLX_MASK_2( _structure, _index, _field, ... ) \ ( MLX_MASK_1 ( _structure, _index, _field ) | \ @@ -231,4 +245,21 @@ struct addr_64_st { *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) +/* + * Extract value of named field + * + */ + +#define MLX_EXTRACT( _base, _structure, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( _structure, _field ); \ + uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + __index ); \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= MLX_DWORD_BIT_OFFSET ( _structure, __index, \ + _field ); \ + __value &= MLX_BIT_MASK ( _structure, _field ); \ + __value; \ + } ) + #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index e8fcbb40..601a1f40 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -27,12 +27,13 @@ struct arbel_send_work_queue { /** Doorbell number */ unsigned int doorbell_idx; /** Work queue entries */ - struct ud_send_wqe_st *wqe; + // struct ud_send_wqe_st *wqe; + union ud_send_wqe_u *wqe_u; }; struct arbel { /** User Access Region */ - unsigned long uar; + void *uar; /** Doorbell records */ union db_record_st *db_rec; }; @@ -88,7 +89,6 @@ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; static int mlx_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; - ud_av_t av = iobuf->data; ud_send_wqe_t snd_wqe; int rc; @@ -110,6 +110,58 @@ static int mlx_transmit ( struct net_device *netdev, return 0; } +static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, + struct ib_address_vector *av, + struct ib_queue_pair *qp ); + +static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; +static int tx_posted = 0; + +static int mlx_transmit_direct ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct mlx_nic *mlx = netdev->priv; + int rc; + + struct arbel arbel = { + .uar = memfree_pci_dev.uar, + .db_rec = dev_ib_data.uar_context_base, + }; + struct arbel_send_work_queue arbel_send_queue = { + .doorbell_idx = IPOIB_SND_QP_DB_IDX, + .wqe_u = ( (struct udqp_st *) ipoib_data.ipoib_qph )->snd_wq, + }; + struct ib_device ibdev = { + .priv = &arbel, + }; + struct ib_queue_pair qp = { + .qpn = ib_get_qpn ( mlx->ipoib_qph ), + .send = { + .num_wqes = NUM_IPOIB_SND_WQES, + .posted = tx_posted, + .iobufs = tx_ring, + .priv = &arbel_send_queue, + }, + }; + struct ud_av_st *bcast_av = mlx->bcast_av; + struct address_vector_st *bav = &bcast_av->av; + struct ib_address_vector av = { + .dest_qp = bcast_av->dest_qp, + .qkey = bcast_av->qkey, + .dlid = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, rlid ), + .rate = ( MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, sl ), + .gid_present = 1, + }; + memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); + + rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); + + tx_posted = qp.send.posted; + + return rc; +} + + /** * Handle TX completion * @@ -234,7 +286,7 @@ static void mlx_irq ( struct net_device *netdev, int enable ) { static struct net_device_operations mlx_operations = { .open = mlx_open, .close = mlx_close, - .transmit = mlx_transmit, + .transmit = mlx_transmit_direct, .poll = mlx_poll, .irq = mlx_irq, }; @@ -274,12 +326,13 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Allocate work queue entry */ prev_wqe_idx = wq->posted; wqe_idx = ( prev_wqe_idx + 1 ); - if ( wq->iobuf[wqe_idx & wqe_idx_mask] ) { + if ( wq->iobufs[wqe_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; } - prev_wqe = &arbel_wq->wqe[prev_wqe_idx & wqe_idx_mask]; - wqe = &arbel_wq->wqe[wqe_idx & wqe_idx_mask]; + wq->iobufs[wqe_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_wq->wqe_u[prev_wqe_idx & wqe_idx_mask].wqe_cont.wqe; + wqe = &arbel_wq->wqe_u[wqe_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ memset ( &wqe->next.control, 0, diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index ccb6e49e..8b3a2f7c 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -73,10 +73,10 @@ struct ib_work_queue { * This is the index of the most recently posted entry. */ unsigned int posted; + /** I/O buffers assigned to work queue */ + struct io_buffer **iobufs; /** Driver private data */ void *priv; - /** I/O buffers assigned to work queue */ - struct io_buffer *iobuf[0]; }; /** An Infiniband Queue Pair */ From 970951666fe478399b79bdc4b66c015b06fff563 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 02:53:05 +0100 Subject: [PATCH 23/84] arbel_post_send() has been observed to transmit a packet! --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 12 ++++++ src/drivers/net/mlx_ipoib/mt25218.c | 60 +++++++++++++++++--------- src/include/gpxe/infiniband.h | 7 +-- 3 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index dcd49e45..8122a20e 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1122,9 +1122,14 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) struct send_doorbell_st dbell; __u32 nds; + DBG ( "Work queue entry:\n" ); + DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); + qp->post_send_counter++; WRITE_WORD_VOL(qp->send_uar_context, 2, htons(qp->post_send_counter)); + DBG ( "Doorbell record:\n" ); + DBG_HD ( qp->send_uar_context, 8 ); memset(&dbell, 0, sizeof dbell); INS_FLD(XDEV_NOPCODE_SEND, &dbell, arbelprm_send_doorbell_st, nopcode); @@ -1148,6 +1153,10 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) INS_FLD_TO_BE(XDEV_NOPCODE_SEND, &qp->last_posted_snd_wqe->next.next, arbelprm_wqe_segment_next_st, nopcode); + + DBG ( "Previous work queue entry's next field:\n" ); + DBG_HD ( &qp->last_posted_snd_wqe->next.next, + sizeof ( qp->last_posted_snd_wqe->next.next ) ); } rc = cmd_post_doorbell(&dbell, POST_SND_OFFSET); @@ -1965,6 +1974,9 @@ static void dev_post_dbell(void *dbell, __u32 offset) address = (unsigned long)(memfree_pci_dev.uar) + offset; tprintf("va=0x%lx pa=0x%lx", address, virt_to_bus((const void *)address)); + DBG ( "dev_post_dbell %08lx:%08lx to %lx\n", + htonl ( ptr[0] ), htonl ( ptr[1] ), + virt_to_phys ( memfree_pci_dev.uar + offset ) ); writel(htonl(ptr[0]), memfree_pci_dev.uar + offset); barrier(); address += 4; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 601a1f40..26e02cd0 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -115,7 +115,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_queue_pair *qp ); static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; -static int tx_posted = 0; +static int next_tx_idx = 0; static int mlx_transmit_direct ( struct net_device *netdev, struct io_buffer *iobuf ) { @@ -128,7 +128,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; struct arbel_send_work_queue arbel_send_queue = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe_u = ( (struct udqp_st *) ipoib_data.ipoib_qph )->snd_wq, + .wqe_u = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, }; struct ib_device ibdev = { .priv = &arbel, @@ -137,7 +137,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, .qpn = ib_get_qpn ( mlx->ipoib_qph ), .send = { .num_wqes = NUM_IPOIB_SND_WQES, - .posted = tx_posted, + .next_idx = next_tx_idx, .iobufs = tx_ring, .priv = &arbel_send_queue, }, @@ -156,7 +156,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); - tx_posted = qp.send.posted; + next_tx_idx = qp.send.next_idx; return rc; } @@ -286,7 +286,11 @@ static void mlx_irq ( struct net_device *netdev, int enable ) { static struct net_device_operations mlx_operations = { .open = mlx_open, .close = mlx_close, +#if 0 + .transmit = mlx_transmit, +#else .transmit = mlx_transmit_direct, +#endif .poll = mlx_poll, .irq = mlx_irq, }; @@ -301,6 +305,10 @@ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, unsigned int offset ) { uint32_t *db_reg_dword = db_reg; + DBG ( "arbel_ring_doorbell %08lx:%08lx to %lx\n", + db_reg_dword[0], db_reg_dword[1], + virt_to_phys ( arbel->uar + offset ) ); + barrier(); writel ( db_reg_dword[0], ( arbel->uar + offset + 0 ) ); barrier(); @@ -314,8 +322,6 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_wq = wq->priv; unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); - unsigned int prev_wqe_idx; - unsigned int wqe_idx; struct ud_send_wqe_st *prev_wqe; struct ud_send_wqe_st *wqe; struct ib_gid *gid; @@ -324,17 +330,17 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct send_doorbell_st db_reg; /* Allocate work queue entry */ - prev_wqe_idx = wq->posted; - wqe_idx = ( prev_wqe_idx + 1 ); - if ( wq->iobufs[wqe_idx & wqe_idx_mask] ) { + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; } - wq->iobufs[wqe_idx & wqe_idx_mask] = iobuf; - prev_wqe = &arbel_wq->wqe_u[prev_wqe_idx & wqe_idx_mask].wqe_cont.wqe; - wqe = &arbel_wq->wqe_u[wqe_idx & wqe_idx_mask].wqe_cont.wqe; + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_wq->wqe_u[(wq->next_idx - 1) & wqe_idx_mask].wqe_cont.wqe; + wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ + MLX_POPULATE_1 ( &wqe->next.next, arbelprm_wqe_segment_next_st, 1, + always1, 1 ); memset ( &wqe->next.control, 0, sizeof ( wqe->next.control ) ); MLX_POPULATE_1 ( &wqe->next.control, @@ -359,13 +365,22 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, destination_qp, av->dest_qp ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = - cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + + // wqe->mpointer[0].local_addr_l = + // cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + + memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), + iobuf->data, iob_len ( iobuf ) ); + + wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + DBG ( "Work queue entry:\n" ); + DBG_HD ( wqe, sizeof ( *wqe ) ); + /* Update previous work queue entry's "next" field */ - nds = ( offsetof ( typeof ( *wqe ), mpointer ) + - sizeof ( wqe->mpointer[0] ) ); + nds = ( ( offsetof ( typeof ( *wqe ), mpointer ) + + sizeof ( wqe->mpointer[0] ) ) >> 4 ); MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, nopcode, XDEV_NOPCODE_SEND ); MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, @@ -373,25 +388,30 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, f, 1, always1, 1 ); + DBG ( "Previous work queue entry's next field:\n" ); + DBG_HD ( &prev_wqe->next.next, sizeof ( prev_wqe->next.next ) ); + /* Update doorbell record */ db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, - counter, ( wqe_idx & 0xffff ) ); + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); + DBG ( "Doorbell record:\n" ); + DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, nopcode, XDEV_NOPCODE_SEND, f, 1, - wqe_counter, ( prev_wqe_idx & 0xffff ), + wqe_counter, ( wq->next_idx & 0xffff ), wqe_cnt, 1 ); MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, nds, nds, qpn, qp->qpn ); arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); - /* Update work queue's posted index */ - wq->posted = wqe_idx; + /* Update work queue's index */ + wq->next_idx++; return 0; } diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 8b3a2f7c..9337af35 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -68,11 +68,12 @@ struct ibhdr { struct ib_work_queue { /** Number of work queue entries */ unsigned int num_wqes; - /** Posted index + /** Next work queue entry index * - * This is the index of the most recently posted entry. + * This is the index of the next entry to be filled (i.e. the + * first empty entry). */ - unsigned int posted; + unsigned int next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; /** Driver private data */ From 37a036bd4844219bf95c9144e8d0595b73c59b39 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 03:22:04 +0100 Subject: [PATCH 24/84] Map the whole of physical memory --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 8 +++++--- src/drivers/net/mlx_ipoib/mt25218.c | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 8122a20e..b6552f9f 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -333,9 +333,11 @@ static void prep_sw2hw_mpt_buf(void *buf, __u32 mkey) INS_FLD(1, buf, arbelprm_mpt_st, r_w); INS_FLD(mkey, buf, arbelprm_mpt_st, mem_key); INS_FLD(GLOBAL_PD, buf, arbelprm_mpt_st, pd); - INS_FLD(virt_to_bus(dev_buffers_p), buf, arbelprm_mpt_st, - start_address_l); - INS_FLD(memreg_size, buf, arbelprm_mpt_st, reg_wnd_len_l); + // INS_FLD(virt_to_bus(dev_buffers_p), buf, arbelprm_mpt_st, + // start_address_l); + // INS_FLD(memreg_size, buf, arbelprm_mpt_st, reg_wnd_len_l); + INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_l); + INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_h); } static void prep_sw2hw_eq_buf(void *buf, struct eqe_t *eq_buf) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 26e02cd0..ecf873bb 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -366,11 +366,11 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, q_key, av->qkey ); - // wqe->mpointer[0].local_addr_l = - // cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); + wqe->mpointer[0].local_addr_l = + cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); - memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), - iobuf->data, iob_len ( iobuf ) ); + // memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), + // iobuf->data, iob_len ( iobuf ) ); wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); From a3a91fedc11ea3f8de4c0ad3378ea610f1ccb960 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 15:40:35 +0100 Subject: [PATCH 25/84] Started added poll_cq() verb. Started reworking MLX_EXTRACT(), MLX_POPULATE() etc. to automatically determine type information. --- src/drivers/net/mlx_ipoib/bit_ops.h | 165 ++++++++++++++++------------ src/drivers/net/mlx_ipoib/mt25218.c | 103 +++++++++++++++-- src/include/gpxe/infiniband.h | 56 +++++++++- 3 files changed, 240 insertions(+), 84 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 969de642..2bc7684d 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -137,112 +137,139 @@ struct addr_64_st { /* Remaining code Copyright Fen Systems Ltd. 2007 */ +/** + * Wrapper structure for pseudo_bit_t structures + * + * This structure provides a wrapper around the autogenerated + * pseudo_bit_t structures. It has the correct size, and also + * encapsulates type information about the underlying pseudo_bit_t + * structure, which allows the MLX_POPULATE etc. macros to work + * without requiring explicit type information. + */ +#define MLX_DECLARE_STRUCT( _structure ) \ + _structure { \ + union { \ + uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \ + uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \ + struct _structure ## _st *dummy[0]; \ + } u; \ + } + +/** Get pseudo_bit_t structure type from wrapper structure pointer */ +#define MLX_PSEUDO_STRUCT( _ptr ) \ + typeof ( *((_ptr)->u.dummy[0]) ) + /** Bit offset of a field within a pseudo_bit_t structure */ -#define MLX_BIT_OFFSET( _structure, _field ) \ - offsetof ( struct _structure, _field ) +#define MLX_BIT_OFFSET( _structure_st, _field ) \ + offsetof ( _structure_st, _field ) /** Dword offset of a field within a pseudo_bit_t structure */ -#define MLX_DWORD_OFFSET( _structure, _field ) \ - ( MLX_BIT_OFFSET ( _structure, _field ) / 32 ) +#define MLX_DWORD_OFFSET( _structure_st, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 ) /** Dword bit offset of a field within a pseudo_bit_t structure * * Yes, using mod-32 would work, but would lose the check for the * error of specifying a mismatched field name and dword index. */ -#define MLX_DWORD_BIT_OFFSET( _structure, _index, _field ) \ - ( MLX_BIT_OFFSET ( _structure, _field ) - ( 32 * (_index) ) ) +#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) ) /** Bit width of a field within a pseudo_bit_t structure */ -#define MLX_BIT_WIDTH( _structure, _field ) \ - sizeof ( ( ( struct _structure * ) NULL )->_field ) +#define MLX_BIT_WIDTH( _structure_st, _field ) \ + sizeof ( ( ( _structure_st * ) NULL )->_field ) /** Bit mask for a field within a pseudo_bit_t structure */ -#define MLX_BIT_MASK( _structure, _field ) \ - ( ( 1 << MLX_BIT_WIDTH ( _structure, _field ) ) - 1 ) +#define MLX_BIT_MASK( _structure_st, _field ) \ + ( ( 1 << MLX_BIT_WIDTH ( _structure_st, _field ) ) - 1 ) /* * Assemble native-endian dword from named fields and values * */ -#define MLX_ASSEMBLE_1( _structure, _index, _field, _value ) \ - ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) +#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \ + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) -#define MLX_ASSEMBLE_2( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_ASSEMBLE_3( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_ASSEMBLE_4( _structure, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure, _index, _field, _value ) | \ - MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) /* * Build native-endian (positive) dword bitmasks from named fields * */ -#define MLX_MASK_1( _structure, _index, _field ) \ - ( MLX_BIT_MASK ( _structure, _field ) << \ - MLX_DWORD_BIT_OFFSET ( _structure, _index, _field ) ) +#define MLX_MASK_1( _structure_st, _index, _field ) \ + ( MLX_BIT_MASK ( _structure_st, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) -#define MLX_MASK_2( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_2( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_MASK_3( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_3( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) ) -#define MLX_MASK_4( _structure, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure, _index, _field ) | \ - MLX_MASK_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_MASK_4( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) /* * Populate big-endian dwords from named fields and values * */ -#define MLX_POPULATE( _base, _index, _assembled ) \ - do { \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ - uint32_t __assembled = (_assembled); \ - *__ptr = cpu_to_be32 ( __assembled ); \ +#define MLX_POPULATE( _ptr, _index, _assembled ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ } while ( 0 ) -#define MLX_POPULATE_1( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_1 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_1( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_2( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_2 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_2( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_3( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_3 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_3( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_4( _base, _structure, _index, ... ) \ - MLX_POPULATE ( _base, _index, \ - MLX_ASSEMBLE_4 ( _structure, _index, __VA_ARGS__ ) ) +#define MLX_POPULATE_4( _ptr, _index, ... ) \ + MLX_POPULATE ( _ptr, _index, \ + MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value * */ -#define MLX_MODIFY( _base, _structure, _index, _field, _value ) \ - do { \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + (_index) ); \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value &= ~( MLX_MASK_1 ( _structure, _index, _field ) ); \ - __value |= MLX_ASSEMBLE_1 ( _structure, _index, \ - _field, _value ); \ - *__ptr = cpu_to_be32 ( __value ); \ +#define MLX_MODIFY( _ptr, _index, _field, _value ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + _index, _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) /* @@ -250,16 +277,18 @@ struct addr_64_st { * */ -#define MLX_EXTRACT( _base, _structure, _field ) \ - ( { \ - unsigned int __index = \ - MLX_DWORD_OFFSET ( _structure, _field ); \ - uint32_t *__ptr = ( ( (uint32_t *) (_base) ) + __index ); \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value >>= MLX_DWORD_BIT_OFFSET ( _structure, __index, \ - _field ); \ - __value &= MLX_BIT_MASK ( _structure, _field ); \ - __value; \ +#define MLX_EXTRACT( _ptr, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= \ + MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ); \ + __value &= \ + MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + __value; \ } ) #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index ecf873bb..42e5465c 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -23,14 +23,23 @@ Skeleton NIC driver for Etherboot #include "mt25218_imp.c" +#include "arbel.h" + struct arbel_send_work_queue { - /** Doorbell number */ + /** Doorbell record number */ unsigned int doorbell_idx; /** Work queue entries */ // struct ud_send_wqe_st *wqe; union ud_send_wqe_u *wqe_u; }; +struct arbel_completion_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Completion queue entries */ + union cqe_st *cqe; +}; + struct arbel { /** User Access Region */ void *uar; @@ -143,13 +152,14 @@ static int mlx_transmit_direct ( struct net_device *netdev, }, }; struct ud_av_st *bcast_av = mlx->bcast_av; - struct address_vector_st *bav = &bcast_av->av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; struct ib_address_vector av = { .dest_qp = bcast_av->dest_qp, .qkey = bcast_av->qkey, - .dlid = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, rlid ), - .rate = ( MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_EXTRACT ( bav, arbelprm_ud_address_vector_st, sl ), + .dlid = MLX_EXTRACT ( bav, rlid ), + .rate = ( MLX_EXTRACT ( bav, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_EXTRACT ( bav, sl ), .gid_present = 1, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); @@ -301,6 +311,13 @@ static struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; +/** + * Ring doorbell register in UAR + * + * @v arbel Arbel device + * @v db_reg Doorbell register structure + * @v offset Address of doorbell + */ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, unsigned int offset ) { uint32_t *db_reg_dword = db_reg; @@ -315,6 +332,15 @@ static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, writel ( db_reg_dword[1], ( arbel->uar + offset + 4 ) ); } +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v iobuf I/O buffer + * @v av Address vector + * @v qp Queue pair + * @ret rc Return status code + */ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct ib_address_vector *av, struct ib_queue_pair *qp ) { @@ -365,14 +391,8 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, destination_qp, av->dest_qp ); MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); - - // memcpy ( bus_to_virt ( be32_to_cpu ( wqe->mpointer[0].local_addr_l ) ), - // iobuf->data, iob_len ( iobuf ) ); - - wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); DBG ( "Work queue entry:\n" ); @@ -416,8 +436,69 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, return 0; } +static void arbel_parse_completion ( struct arbel *arbel, + union cqe_st *cqe, + struct ib_completion *completion ) { + memset ( completion, 0, sizeof ( *completion ) ); + is_send = MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, s ); + completion->len = + MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, + byte_cnt );} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete Completion handler + */ +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq = cq->priv; + unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); + union db_record_st *db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; + union cqe_st *cqe; + struct ib_completion completion; + struct io_buffer *iobuf; + int is_send; + + while ( 1 ) { + /* Look for completion entry */ + cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; + if ( MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, + owner ) != 0 ) { + /* Entry still owned by hardware; end of poll */ + break; + } + + /* Parse completion */ + + + + /* Handle completion */ + ( is_send ? complete_send : complete_recv ) ( ibdev, + &completion, + iobuf ); + + /* Return ownership to hardware */ + MLX_POPULATE_1 ( cqe, arbelprm_completion_queue_entry_st, 7, + owner, 1 ); + barrier(); + /* Update completion queue's index */ + cq->next_idx++; + /* Update doorbell record */ + MLX_POPULATE_1 ( db_rec, arbelprm_cq_ci_db_record_st, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); + } +} + +/** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, + .poll_cq = arbel_poll_cq, }; /** diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 9337af35..c0819158 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -63,6 +63,7 @@ struct ibhdr { +struct ib_device; /** An Infiniband Work Queue */ struct ib_work_queue { @@ -71,9 +72,11 @@ struct ib_work_queue { /** Next work queue entry index * * This is the index of the next entry to be filled (i.e. the - * first empty entry). + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. */ - unsigned int next_idx; + unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; /** Driver private data */ @@ -92,6 +95,38 @@ struct ib_queue_pair { void *priv; }; +/** An Infiniband Completion Queue */ +struct ib_completion_queue { + /** Number of completion queue entries */ + unsigned int num_cqes; + /** Next completion queue entry index + * + * This is the index of the next entry to be filled (i.e. the + * first empty entry). This value is not bounded by num_wqes; + * users must logical-AND with (num_wqes-1) to generate an + * array index. + */ + unsigned long next_idx; + /** Driver private data */ + void *priv; +}; + +/** An Infiniband completion */ +struct ib_completion { + /** Length */ + size_t len; +}; + +/** An Infiniband completion handler + * + * @v ibdev Infiniband device + * @v completion Completion + * @v iobuf I/O buffer + */ +typedef void ( * ib_completer_t ) ( struct ib_device *ibdev, + struct ib_completion *completion, + struct io_buffer *iobuf ); + /** An Infiniband Address Vector */ struct ib_address_vector { /** Destination Queue Pair */ @@ -110,15 +145,13 @@ struct ib_address_vector { struct ib_gid gid; }; -struct ib_device; - /** * Infiniband device operations * * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { - /** Post Send work queue entry + /** Post send work queue entry * * @v ibdev Infiniband device * @v iobuf I/O buffer @@ -135,6 +168,19 @@ struct ib_device_operations { struct io_buffer *iobuf, struct ib_address_vector *av, struct ib_queue_pair *qp ); + /** Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * + * The completion handler takes ownership of the I/O buffer. + */ + void ( * poll_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); }; /** An Infiniband device */ From 6a791649f0a907c595b5efef5cbb1b2d8d6a9713 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 18:44:09 +0100 Subject: [PATCH 26/84] Updated MLX_* accessor macros to use implicit type information. --- src/drivers/net/mlx_ipoib/arbel.h | 39 ++++++++ src/drivers/net/mlx_ipoib/bit_ops.h | 50 +++++----- src/drivers/net/mlx_ipoib/mt25218.c | 141 +++++++++++++--------------- src/include/gpxe/infiniband.h | 2 + 4 files changed, 132 insertions(+), 100 deletions(-) create mode 100644 src/drivers/net/mlx_ipoib/arbel.h diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h new file mode 100644 index 00000000..e0993044 --- /dev/null +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -0,0 +1,39 @@ +#ifndef _ARBEL_H +#define _ARBEL_H + +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); +struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); + +#define ARBELPRM_MAX_GATHER 1 + +struct arbelprm_ud_send_wqe { + struct arbelprm_wqe_segment_next next; + struct arbelprm_wqe_segment_ctrl_send ctrl; + struct arbelprm_wqe_segment_ud ud; + struct arbelprm_wqe_segment_data_ptr data[ARBELPRM_MAX_GATHER]; +} __attribute__ (( packed )); + +union arbelprm_completion_entry { + struct arbelprm_completion_queue_entry normal; + struct arbelprm_completion_with_error error; +} __attribute__ (( packed )); + +union arbelprm_doorbell_record { + struct arbelprm_cq_ci_db_record cq_ci; + struct arbelprm_qp_db_record qp; +} __attribute__ (( packed )); + +union arbelprm_doorbell_register { + struct arbelprm_send_doorbell send; + uint32_t dword[2]; +} __attribute__ (( packed )); + +#endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 2bc7684d..960d0668 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -143,8 +143,8 @@ struct addr_64_st { * This structure provides a wrapper around the autogenerated * pseudo_bit_t structures. It has the correct size, and also * encapsulates type information about the underlying pseudo_bit_t - * structure, which allows the MLX_POPULATE etc. macros to work - * without requiring explicit type information. + * structure, which allows the MLX_FILL etc. macros to work without + * requiring explicit type information. */ #define MLX_DECLARE_STRUCT( _structure ) \ _structure { \ @@ -181,7 +181,8 @@ struct addr_64_st { /** Bit mask for a field within a pseudo_bit_t structure */ #define MLX_BIT_MASK( _structure_st, _field ) \ - ( ( 1 << MLX_BIT_WIDTH ( _structure_st, _field ) ) - 1 ) + ( ( ~( ( uint32_t ) 0 ) ) >> \ + ( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) ) /* * Assemble native-endian dword from named fields and values @@ -229,46 +230,45 @@ struct addr_64_st { * */ -#define MLX_POPULATE( _ptr, _index, _assembled ) \ +#define MLX_FILL( _ptr, _index, _assembled ) \ do { \ uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ uint32_t __assembled = (_assembled); \ *__ptr = cpu_to_be32 ( __assembled ); \ } while ( 0 ) -#define MLX_POPULATE_1( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_1( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_2( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_2( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_3( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) +#define MLX_FILL_3( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_4( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) -#define MLX_POPULATE_4( _ptr, _index, ... ) \ - MLX_POPULATE ( _ptr, _index, \ - MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value * */ -#define MLX_MODIFY( _ptr, _index, _field, _value ) \ +#define MLX_SET( _ptr, _field, _value ) \ do { \ - uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ uint32_t __value = be32_to_cpu ( *__ptr ); \ __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, _field ) ); \ + __index, _field ) ); \ __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - _index, _field, _value ); \ + __index, _field, _value ); \ *__ptr = cpu_to_be32 ( __value ); \ } while ( 0 ) @@ -277,7 +277,7 @@ struct addr_64_st { * */ -#define MLX_EXTRACT( _ptr, _field ) \ +#define MLX_GET( _ptr, _field ) \ ( { \ unsigned int __index = \ MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 42e5465c..0453ba79 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -37,14 +37,14 @@ struct arbel_completion_queue { /** Doorbell record number */ unsigned int doorbell_idx; /** Completion queue entries */ - union cqe_st *cqe; + union arbelprm_completion_entry *cqe; }; struct arbel { /** User Access Region */ void *uar; /** Doorbell records */ - union db_record_st *db_rec; + union arbelprm_doorbell_record *db_rec; }; @@ -157,9 +157,9 @@ static int mlx_transmit_direct ( struct net_device *netdev, struct ib_address_vector av = { .dest_qp = bcast_av->dest_qp, .qkey = bcast_av->qkey, - .dlid = MLX_EXTRACT ( bav, rlid ), - .rate = ( MLX_EXTRACT ( bav, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_EXTRACT ( bav, sl ), + .dlid = MLX_GET ( bav, rlid ), + .rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ), + .sl = MLX_GET ( bav, sl ), .gid_present = 1, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); @@ -318,18 +318,18 @@ static struct ib_gid arbel_no_gid = { * @v db_reg Doorbell register structure * @v offset Address of doorbell */ -static void arbel_ring_doorbell ( struct arbel *arbel, void *db_reg, +static void arbel_ring_doorbell ( struct arbel *arbel, + union arbelprm_doorbell_register *db_reg, unsigned int offset ) { - uint32_t *db_reg_dword = db_reg; DBG ( "arbel_ring_doorbell %08lx:%08lx to %lx\n", - db_reg_dword[0], db_reg_dword[1], + db_reg->dword[0], db_reg->dword[1], virt_to_phys ( arbel->uar + offset ) ); barrier(); - writel ( db_reg_dword[0], ( arbel->uar + offset + 0 ) ); + writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); barrier(); - writel ( db_reg_dword[1], ( arbel->uar + offset + 4 ) ); + writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); } /** @@ -347,15 +347,16 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, struct arbel *arbel = ibdev->priv; struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_wq = wq->priv; - unsigned int wqe_idx_mask = ( wq->num_wqes - 1 ); - struct ud_send_wqe_st *prev_wqe; - struct ud_send_wqe_st *wqe; + struct arbelprm_ud_send_wqe *prev_wqe; + struct arbelprm_ud_send_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + union arbelprm_doorbell_register db_reg; struct ib_gid *gid; + unsigned int wqe_idx_mask; size_t nds; - union db_record_st *db_rec; - struct send_doorbell_st db_reg; /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { DBGC ( arbel, "ARBEL %p send queue full", arbel ); return -ENOBUFS; @@ -365,69 +366,61 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; /* Construct work queue entry */ - MLX_POPULATE_1 ( &wqe->next.next, arbelprm_wqe_segment_next_st, 1, - always1, 1 ); - memset ( &wqe->next.control, 0, - sizeof ( wqe->next.control ) ); - MLX_POPULATE_1 ( &wqe->next.control, - arbelprm_wqe_segment_ctrl_send_st, 0, - always1, 1 ); - memset ( &wqe->udseg, 0, sizeof ( wqe->udseg ) ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 0, - pd, GLOBAL_PD, - port_number, PXE_IB_PORT ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 1, - rlid, av->dlid, - g, av->gid_present ); - MLX_POPULATE_2 ( &wqe->udseg, arbelprm_ud_address_vector_st, 2, - max_stat_rate, ( ( av->rate >= 3 ) ? 0 : 1 ), - msg, 3 ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_ud_address_vector_st, 3, - sl, av->sl ); + MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); + memset ( &wqe->ctrl, 0, sizeof ( wqe->ctrl ) ); + MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); + memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); + MLX_FILL_2 ( &wqe->ud, 0, + ud_address_vector.pd, GLOBAL_PD, + ud_address_vector.port_number, PXE_IB_PORT ); + MLX_FILL_2 ( &wqe->ud, 1, + ud_address_vector.rlid, av->dlid, + ud_address_vector.g, av->gid_present ); + MLX_FILL_2 ( &wqe->ud, 2, + ud_address_vector.max_stat_rate, + ( ( av->rate >= 3 ) ? 0 : 1 ), + ud_address_vector.msg, 3 ); + MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); - memcpy ( ( ( ( void * ) &wqe->udseg ) + 16 ), - gid, sizeof ( *gid ) ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 8, - destination_qp, av->dest_qp ); - MLX_POPULATE_1 ( &wqe->udseg, arbelprm_wqe_segment_ud_st, 9, - q_key, av->qkey ); - wqe->mpointer[0].local_addr_l = - cpu_to_be32 ( virt_to_bus ( iobuf->data ) ); - wqe->mpointer[0].byte_count = cpu_to_be32 ( iob_len ( iobuf ) ); + memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); + MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); DBG ( "Work queue entry:\n" ); DBG_HD ( wqe, sizeof ( *wqe ) ); /* Update previous work queue entry's "next" field */ - nds = ( ( offsetof ( typeof ( *wqe ), mpointer ) + - sizeof ( wqe->mpointer[0] ) ) >> 4 ); - MLX_MODIFY ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 0, - nopcode, XDEV_NOPCODE_SEND ); - MLX_POPULATE_3 ( &prev_wqe->next.next, arbelprm_wqe_segment_next_st, 1, - nds, nds, - f, 1, - always1, 1 ); + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + MLX_SET ( &prev_wqe->next, nopcode, XDEV_NOPCODE_SEND ); + MLX_FILL_3 ( &prev_wqe->next, 1, + nds, nds, + f, 1, + always1, 1 ); DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &prev_wqe->next.next, sizeof ( prev_wqe->next.next ) ); + DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); /* Update doorbell record */ db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; - MLX_POPULATE_1 ( db_rec, arbelprm_qp_db_record_st, 0, - counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); DBG ( "Doorbell record:\n" ); DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ - MLX_POPULATE_4 ( &db_reg, arbelprm_send_doorbell_st, 0, - nopcode, XDEV_NOPCODE_SEND, - f, 1, - wqe_counter, ( wq->next_idx & 0xffff ), - wqe_cnt, 1 ); - MLX_POPULATE_2 ( &db_reg, arbelprm_send_doorbell_st, 1, - nds, nds, - qpn, qp->qpn ); + MLX_FILL_4 ( &db_reg.send, 0, + nopcode, XDEV_NOPCODE_SEND, + f, 1, + wqe_counter, ( wq->next_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_FILL_2 ( &db_reg.send, 1, + nds, nds, + qpn, qp->qpn ); arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); /* Update work queue's index */ @@ -437,13 +430,12 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, } static void arbel_parse_completion ( struct arbel *arbel, - union cqe_st *cqe, + union arbelprm_completion_entry *cqe, struct ib_completion *completion ) { memset ( completion, 0, sizeof ( *completion ) ); - is_send = MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, s ); - completion->len = - MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, - byte_cnt );} + completion->is_send = MLX_GET ( &cqe->normal, s ); + completion->len = MLX_GET ( &cqe->normal, byte_cnt ); +} /** * Poll completion queue @@ -459,8 +451,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct arbel *arbel = ibdev->priv; struct arbel_completion_queue *arbel_cq = cq->priv; unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); - union db_record_st *db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; - union cqe_st *cqe; + union arbelprm_doorbell_record *db_rec; + union arbelprm_completion_entry *cqe; struct ib_completion completion; struct io_buffer *iobuf; int is_send; @@ -468,8 +460,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, while ( 1 ) { /* Look for completion entry */ cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; - if ( MLX_EXTRACT ( cqe, arbelprm_completion_queue_entry_st, - owner ) != 0 ) { + if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { /* Entry still owned by hardware; end of poll */ break; } @@ -484,14 +475,14 @@ static void arbel_poll_cq ( struct ib_device *ibdev, iobuf ); /* Return ownership to hardware */ - MLX_POPULATE_1 ( cqe, arbelprm_completion_queue_entry_st, 7, - owner, 1 ); + MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); barrier(); /* Update completion queue's index */ cq->next_idx++; /* Update doorbell record */ - MLX_POPULATE_1 ( db_rec, arbelprm_cq_ci_db_record_st, 0, - counter, ( cq->next_idx & 0xffffffffUL ) ); + db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->cq_ci, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); } } diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index c0819158..72a85d42 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -113,6 +113,8 @@ struct ib_completion_queue { /** An Infiniband completion */ struct ib_completion { + /** Completion is for send queue */ + int is_send; /** Length */ size_t len; }; From 687afdcdd9df6803498affe13b5de9d9146f72d4 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 18:44:35 +0100 Subject: [PATCH 27/84] Add const attribute to byte-swapping functions --- src/arch/i386/include/bits/byteswap.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/arch/i386/include/bits/byteswap.h b/src/arch/i386/include/bits/byteswap.h index 16e31f34..54b93ab9 100644 --- a/src/arch/i386/include/bits/byteswap.h +++ b/src/arch/i386/include/bits/byteswap.h @@ -1,7 +1,7 @@ #ifndef ETHERBOOT_BITS_BYTESWAP_H #define ETHERBOOT_BITS_BYTESWAP_H -static inline __attribute__ ((always_inline)) uint16_t +static inline __attribute__ ((always_inline, const)) uint16_t __i386_bswap_16(uint16_t x) { __asm__("xchgb %b0,%h0\n\t" @@ -10,7 +10,7 @@ __i386_bswap_16(uint16_t x) return x; } -static inline __attribute__ ((always_inline)) uint32_t +static inline __attribute__ ((always_inline, const)) uint32_t __i386_bswap_32(uint32_t x) { __asm__("xchgb %b0,%h0\n\t" @@ -21,7 +21,7 @@ __i386_bswap_32(uint32_t x) return x; } -static inline __attribute__ ((always_inline)) uint64_t +static inline __attribute__ ((always_inline, const)) uint64_t __i386_bswap_64(uint64_t x) { union { From 8deef093d90d8558925cc4c6159b8e2bcf8b02e3 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 20:58:29 +0100 Subject: [PATCH 28/84] Direct polling of TX completion queue now works. --- src/drivers/net/mlx_ipoib/arbel.h | 79 +++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 251 +++++++++++++++++++++------- src/include/gpxe/infiniband.h | 44 +++-- src/net/infiniband.c | 20 +++ 4 files changed, 319 insertions(+), 75 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index e0993044..f35ef26b 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -1,6 +1,26 @@ #ifndef _ARBEL_H #define _ARBEL_H +/** @file + * + * Mellanox Arbel Infiniband HCA driver + * + */ + +/* + * Hardware constants + * + */ + +#define ARBEL_OPCODE_SEND 0x0a +#define ARBEL_OPCODE_RECV_ERROR 0xfe +#define ARBEL_OPCODE_SEND_ERROR 0xff + +/* + * Wrapper structures for hardware datatypes + * + */ + struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); @@ -12,6 +32,11 @@ struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); +/* + * Composite hardware datatypes + * + */ + #define ARBELPRM_MAX_GATHER 1 struct arbelprm_ud_send_wqe { @@ -36,4 +61,58 @@ union arbelprm_doorbell_register { uint32_t dword[2]; } __attribute__ (( packed )); +/* + * gPXE-specific definitions + * + */ + +/** Alignment of Arbel send work queue entries */ +#define ARBEL_SEND_WQE_ALIGN 128 + +/** An Arbel send work queue entry */ +union arbel_send_wqe { + struct arbelprm_ud_send_wqe ud; + uint8_t force_align[ARBEL_SEND_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel send work queue */ +struct arbel_send_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_send_wqe *wqe; +}; + +/** Alignment of Arbel receive work queue entries */ +#define ARBEL_RECV_WQE_ALIGN 64 + +/** An Arbel receive work queue entry */ +union arbel_recv_wqe { + uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel receive work queue */ +struct arbel_recv_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_recv_wqe *wqe; +}; + +/** An Arbel completion queue */ +struct arbel_completion_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Completion queue entries */ + union arbelprm_completion_entry *cqe; +}; + +/** An Arbel device */ +struct arbel { + /** User Access Region */ + void *uar; + /** Doorbell records */ + union arbelprm_doorbell_record *db_rec; +}; + #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 0453ba79..be114b94 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -25,28 +25,6 @@ Skeleton NIC driver for Etherboot #include "arbel.h" -struct arbel_send_work_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Work queue entries */ - // struct ud_send_wqe_st *wqe; - union ud_send_wqe_u *wqe_u; -}; - -struct arbel_completion_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Completion queue entries */ - union arbelprm_completion_entry *cqe; -}; - -struct arbel { - /** User Access Region */ - void *uar; - /** Doorbell records */ - union arbelprm_doorbell_record *db_rec; -}; - struct mlx_nic { @@ -119,9 +97,10 @@ static int mlx_transmit ( struct net_device *netdev, return 0; } -static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ); + struct io_buffer *iobuf ); static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; static int next_tx_idx = 0; @@ -137,10 +116,10 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; struct arbel_send_work_queue arbel_send_queue = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe_u = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, + .wqe = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, }; struct ib_device ibdev = { - .priv = &arbel, + .dev_priv = &arbel, }; struct ib_queue_pair qp = { .qpn = ib_get_qpn ( mlx->ipoib_qph ), @@ -148,7 +127,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, .num_wqes = NUM_IPOIB_SND_WQES, .next_idx = next_tx_idx, .iobufs = tx_ring, - .priv = &arbel_send_queue, + .dev_priv = &arbel_send_queue, }, }; struct ud_av_st *bcast_av = mlx->bcast_av; @@ -164,7 +143,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &ibdev, iobuf, &av, &qp ); + rc = arbel_post_send ( &ibdev, &qp, &av, iobuf ); next_tx_idx = qp.send.next_idx; @@ -172,6 +151,75 @@ static int mlx_transmit_direct ( struct net_device *netdev, } +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + +static void temp_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + + DBG ( "Wahey! TX completion\n" ); + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +static void temp_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp __unused, + struct ib_completion *completion __unused, + struct io_buffer *iobuf __unused ) { + DBG ( "AARGH! recv completion\n" ); +} + +static int next_cq_idx = 0; + +static void mlx_poll_cq_direct ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; + + struct arbel arbel = { + .uar = memfree_pci_dev.uar, + .db_rec = dev_ib_data.uar_context_base, + }; + struct arbel_send_work_queue arbel_send_queue = { + .doorbell_idx = IPOIB_SND_QP_DB_IDX, + .wqe = ( ( struct udqp_st * ) mlx->ipoib_qph )->snd_wq, + }; + struct ib_device ibdev = { + .dev_priv = &arbel, + }; + struct ib_queue_pair qp = { + .qpn = ib_get_qpn ( mlx->ipoib_qph ), + .send = { + .num_wqes = NUM_IPOIB_SND_WQES, + .next_idx = next_tx_idx, + .iobufs = tx_ring, + .dev_priv = &arbel_send_queue, + }, + .priv = netdev, + }; + struct arbel_completion_queue arbel_cq = { + .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, + .cqe = ( ( struct cq_st * ) mlx->snd_cqh )->cq_buf, + }; + struct ib_completion_queue cq = { + .cqn = 1234, + .num_cqes = NUM_IPOIB_SND_CQES, + .next_idx = next_cq_idx, + .dev_priv = &arbel_cq, + }; + + INIT_LIST_HEAD ( &cq.queue_pairs ); + INIT_LIST_HEAD ( &qp.list ); + list_add ( &qp.list, &cq.queue_pairs ); + + arbel_poll_cq ( &ibdev, &cq, temp_complete_send, temp_complete_recv ); + + next_cq_idx = cq.next_idx; +} + /** * Handle TX completion * @@ -276,7 +324,11 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ +#if 0 mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); +#else + mlx_poll_cq_direct ( netdev ); +#endif mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } @@ -336,17 +388,18 @@ static void arbel_ring_doorbell ( struct arbel *arbel, * Post send work queue entry * * @v ibdev Infiniband device - * @v iobuf I/O buffer - * @v av Address vector * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer * @ret rc Return status code */ -static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ) { - struct arbel *arbel = ibdev->priv; + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; struct ib_work_queue *wq = &qp->send; - struct arbel_send_work_queue *arbel_wq = wq->priv; + struct arbel_send_work_queue *arbel_send_wq = wq->dev_priv; struct arbelprm_ud_send_wqe *prev_wqe; struct arbelprm_ud_send_wqe *wqe; union arbelprm_doorbell_record *db_rec; @@ -358,12 +411,12 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Allocate work queue entry */ wqe_idx_mask = ( wq->num_wqes - 1 ); if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { - DBGC ( arbel, "ARBEL %p send queue full", arbel ); + DBGC ( arbel, "Arbel %p send queue full", arbel ); return -ENOBUFS; } wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; - prev_wqe = &arbel_wq->wqe_u[(wq->next_idx - 1) & wqe_idx_mask].wqe_cont.wqe; - wqe = &arbel_wq->wqe_u[wq->next_idx & wqe_idx_mask].wqe_cont.wqe; + prev_wqe = &arbel_send_wq->wqe[(wq->next_idx - 1) & wqe_idx_mask].ud; + wqe = &arbel_send_wq->wqe[wq->next_idx & wqe_idx_mask].ud; /* Construct work queue entry */ MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); @@ -395,7 +448,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Update previous work queue entry's "next" field */ nds = ( ( offsetof ( typeof ( *wqe ), data ) + sizeof ( wqe->data[0] ) ) >> 4 ); - MLX_SET ( &prev_wqe->next, nopcode, XDEV_NOPCODE_SEND ); + MLX_SET ( &prev_wqe->next, nopcode, ARBEL_OPCODE_SEND ); MLX_FILL_3 ( &prev_wqe->next, 1, nds, nds, f, 1, @@ -405,7 +458,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); /* Update doorbell record */ - db_rec = &arbel->db_rec[arbel_wq->doorbell_idx]; + db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; MLX_FILL_1 ( &db_rec->qp, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); barrier(); @@ -414,7 +467,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, /* Ring doorbell register */ MLX_FILL_4 ( &db_reg.send, 0, - nopcode, XDEV_NOPCODE_SEND, + nopcode, ARBEL_OPCODE_SEND, f, 1, wqe_counter, ( wq->next_idx & 0xffff ), wqe_cnt, 1 ); @@ -429,50 +482,126 @@ static int arbel_post_send ( struct ib_device *ibdev, struct io_buffer *iobuf, return 0; } -static void arbel_parse_completion ( struct arbel *arbel, - union arbelprm_completion_entry *cqe, - struct ib_completion *completion ) { - memset ( completion, 0, sizeof ( *completion ) ); - completion->is_send = MLX_GET ( &cqe->normal, s ); - completion->len = MLX_GET ( &cqe->normal, byte_cnt ); -} +/** + * Handle completion + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v cqe Hardware completion queue entry + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * @ret rc Return status code + */ +static int arbel_complete ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + union arbelprm_completion_entry *cqe, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_completion completion; + struct ib_queue_pair *qp; + struct ib_work_queue *wq; + struct io_buffer *iobuf; + struct arbel_send_work_queue *arbel_send_wq; + struct arbel_recv_work_queue *arbel_recv_wq; + ib_completer_t complete; + unsigned int opcode; + unsigned long qpn; + unsigned int is_send; + unsigned long wqe_adr; + unsigned int wqe_idx; + int rc = 0; + + /* Parse completion */ + memset ( &completion, 0, sizeof ( completion ) ); + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + qpn = MLX_GET ( &cqe->normal, my_qpn ); + is_send = MLX_GET ( &cqe->normal, s ); + wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); + opcode = MLX_GET ( &cqe->normal, opcode ); + if ( opcode >= ARBEL_OPCODE_RECV_ERROR ) { + /* "s" field is not valid for error opcodes */ + is_send = ( opcode == ARBEL_OPCODE_SEND_ERROR ); + completion.syndrome = MLX_GET ( &cqe->error, syndrome ); + DBGC ( arbel, "Arbel %p CPN %lx syndrome %x vendor %lx\n", + arbel, cq->cqn, completion.syndrome, + MLX_GET ( &cqe->error, vendor_code ) ); + rc = -EIO; + /* Don't return immediately; propagate error to completer */ + } + + /* Identify queue pair */ + qp = ib_find_qp ( &cq->queue_pairs, qpn ); + if ( ! qp ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown QPN %lx\n", + arbel, cq->cqn, qpn ); + return -EIO; + } + + /* Identify work queue entry index */ + if ( is_send ) { + wq = &qp->send; + arbel_send_wq = wq->dev_priv; + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / + sizeof ( arbel_send_wq->wqe[0] ) ); + } else { + wq = &qp->recv; + arbel_recv_wq = wq->dev_priv; + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / + sizeof ( arbel_recv_wq->wqe[0] ) ); + } + + /* Identify I/O buffer */ + iobuf = wq->iobufs[wqe_idx]; + if ( ! iobuf ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx empty WQE %x\n", + arbel, cq->cqn, qpn, wqe_idx ); + return -EIO; + } + wq->iobufs[wqe_idx] = NULL; + + /* Pass off to caller's completion handler */ + complete = ( is_send ? complete_send : complete_recv ); + complete ( ibdev, qp, &completion, iobuf ); + + return rc; +} /** * Poll completion queue * * @v ibdev Infiniband device * @v cq Completion queue - * @v complete Completion handler + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler */ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq = cq->priv; - unsigned int cqe_idx_mask = ( cq->num_cqes - 1 ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; union arbelprm_doorbell_record *db_rec; union arbelprm_completion_entry *cqe; - struct ib_completion completion; - struct io_buffer *iobuf; - int is_send; + unsigned int cqe_idx_mask; + int rc; while ( 1 ) { /* Look for completion entry */ + cqe_idx_mask = ( cq->num_cqes - 1 ); cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { /* Entry still owned by hardware; end of poll */ break; } - /* Parse completion */ - - - /* Handle completion */ - ( is_send ? complete_send : complete_recv ) ( ibdev, - &completion, - iobuf ); + if ( ( rc = arbel_complete ( ibdev, cq, cqe, complete_send, + complete_recv ) ) != 0 ) { + DBGC ( arbel, "Arbel %p failed to complete: %s\n", + arbel, strerror ( rc ) ); + DBGC_HD ( arbel, cqe, sizeof ( *cqe ) ); + } /* Return ownership to hardware */ MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 72a85d42..3679a110 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -79,24 +79,30 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; - /** Driver private data */ - void *priv; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Queue Pair */ struct ib_queue_pair { + /** List of queue pairs sharing a completion queue */ + struct list_head list; /** Queue Pair Number */ - uint32_t qpn; + unsigned long qpn; /** Send queue */ struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; - /** Driver private data */ + /** Queue owner private data */ void *priv; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Completion Queue */ struct ib_completion_queue { + /** Completion queue number */ + unsigned long cqn; /** Number of completion queue entries */ unsigned int num_cqes; /** Next completion queue entry index @@ -107,14 +113,19 @@ struct ib_completion_queue { * array index. */ unsigned long next_idx; - /** Driver private data */ - void *priv; + /** List of associated queue pairs */ + struct list_head queue_pairs; + /** Device private data */ + void *dev_priv; }; /** An Infiniband completion */ struct ib_completion { - /** Completion is for send queue */ - int is_send; + /** Syndrome + * + * If non-zero, then the completion is in error. + */ + unsigned int syndrome; /** Length */ size_t len; }; @@ -122,10 +133,12 @@ struct ib_completion { /** An Infiniband completion handler * * @v ibdev Infiniband device + * @v qp Queue pair * @v completion Completion * @v iobuf I/O buffer */ typedef void ( * ib_completer_t ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ); @@ -156,9 +169,9 @@ struct ib_device_operations { /** Post send work queue entry * * @v ibdev Infiniband device - * @v iobuf I/O buffer - * @v av Address vector * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer * @ret rc Return status code * * If this method returns success, the I/O buffer remains @@ -167,9 +180,9 @@ struct ib_device_operations { * interpreted as "failure to enqueue buffer". */ int ( * post_send ) ( struct ib_device *ibdev, - struct io_buffer *iobuf, + struct ib_queue_pair *qp, struct ib_address_vector *av, - struct ib_queue_pair *qp ); + struct io_buffer *iobuf ); /** Poll completion queue * * @v ibdev Infiniband device @@ -187,11 +200,14 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Driver private data */ - void *priv; + /** Device private data */ + void *dev_priv; }; +extern struct ib_queue_pair * ib_find_qp ( struct list_head *list, + unsigned long qpn ); + extern struct ll_protocol infiniband_protocol; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 52811b92..edc93b6e 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -33,6 +33,26 @@ * */ +/** + * Find queue pair from a list + * + * @v list List of queue pairs + * @v qpn Queue pair number + * @ret qp Queue pair, or NULL if not found + */ +struct ib_queue_pair * ib_find_qp ( struct list_head *list, + unsigned long qpn ) { + struct ib_queue_pair *qp; + + list_for_each_entry ( qp, list, list ) { + if ( qp->qpn == qpn ) + return qp; + } + return NULL; +} + + + /** Infiniband broadcast MAC address */ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; From 31b82ea1dde3b2cd30ebe74f1cdfc41be20d69e8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 21:23:34 +0100 Subject: [PATCH 29/84] Hack up IB structures at start-of-day, instead of on each use. --- src/drivers/net/mlx_ipoib/mt25218.c | 54 +++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index be114b94..092854e8 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -38,6 +38,37 @@ struct mlx_nic { cq_t rcv_cqh; }; + +static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; + +static struct arbel static_arbel; +static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { + .doorbell_idx = IPOIB_SND_QP_DB_IDX, +}; +static struct arbel_completion_queue static_arbel_ipoib_send_cq = { + .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, +}; + +static struct ib_device static_ibdev = { + .dev_priv = &static_arbel, +}; +static struct ib_queue_pair static_ipoib_qp = { + .send = { + .num_wqes = NUM_IPOIB_SND_WQES, + .iobufs = static_ipoib_tx_ring, + .dev_priv = &static_arbel_ipoib_send_wq, + }, + .list = LIST_HEAD_INIT ( static_ipoib_qp.list ), +}; +static struct ib_completion_queue static_ipoib_send_cq = { + .cqn = 1234, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_SND_CQES, + .dev_priv = &static_arbel_ipoib_send_cq, + .queue_pairs = LIST_HEAD_INIT ( static_ipoib_send_cq.queue_pairs ), +}; + + + /** * Open network device * @@ -110,6 +141,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, struct mlx_nic *mlx = netdev->priv; int rc; +#if 0 struct arbel arbel = { .uar = memfree_pci_dev.uar, .db_rec = dev_ib_data.uar_context_base, @@ -130,6 +162,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, .dev_priv = &arbel_send_queue, }, }; +#endif struct ud_av_st *bcast_av = mlx->bcast_av; struct arbelprm_ud_address_vector *bav = ( struct arbelprm_ud_address_vector * ) &bcast_av->av; @@ -143,9 +176,13 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); +#if 0 rc = arbel_post_send ( &ibdev, &qp, &av, iobuf ); next_tx_idx = qp.send.next_idx; +#endif + rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); + return rc; } @@ -179,6 +216,7 @@ static int next_cq_idx = 0; static void mlx_poll_cq_direct ( struct net_device *netdev ) { struct mlx_nic *mlx = netdev->priv; +#if 0 struct arbel arbel = { .uar = memfree_pci_dev.uar, .db_rec = dev_ib_data.uar_context_base, @@ -218,6 +256,10 @@ static void mlx_poll_cq_direct ( struct net_device *netdev ) { arbel_poll_cq ( &ibdev, &cq, temp_complete_send, temp_complete_recv ); next_cq_idx = cq.next_idx; +#endif + + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + temp_complete_send, temp_complete_recv ); } /** @@ -674,6 +716,18 @@ static int arbel_probe ( struct pci_device *pci, mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); + /* Hack up IB structures */ + static_arbel.uar = memfree_pci_dev.uar; + static_arbel.db_rec = dev_ib_data.uar_context_base; + static_arbel_ipoib_send_wq.wqe = + ( ( struct udqp_st * ) qph )->snd_wq; + static_arbel_ipoib_send_cq.cqe = + ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; + static_ipoib_qp.qpn = ib_get_qpn ( qph ); + static_ipoib_qp.priv = netdev; + list_add ( &static_ipoib_qp.list, + &static_ipoib_send_cq.queue_pairs ); + /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; From 838b972cd355fbbc9572996a006f14501640ed99 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 21:26:14 +0100 Subject: [PATCH 30/84] Kill off some dead code --- src/drivers/net/mlx_ipoib/mt25218.c | 83 ----------------------------- 1 file changed, 83 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 092854e8..385427fb 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -133,36 +133,11 @@ static int arbel_post_send ( struct ib_device *ibdev, struct ib_address_vector *av, struct io_buffer *iobuf ); -static struct io_buffer *tx_ring[NUM_IPOIB_SND_WQES]; -static int next_tx_idx = 0; - static int mlx_transmit_direct ( struct net_device *netdev, struct io_buffer *iobuf ) { struct mlx_nic *mlx = netdev->priv; int rc; -#if 0 - struct arbel arbel = { - .uar = memfree_pci_dev.uar, - .db_rec = dev_ib_data.uar_context_base, - }; - struct arbel_send_work_queue arbel_send_queue = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe = ( (struct udqp_st *) mlx->ipoib_qph )->snd_wq, - }; - struct ib_device ibdev = { - .dev_priv = &arbel, - }; - struct ib_queue_pair qp = { - .qpn = ib_get_qpn ( mlx->ipoib_qph ), - .send = { - .num_wqes = NUM_IPOIB_SND_WQES, - .next_idx = next_tx_idx, - .iobufs = tx_ring, - .dev_priv = &arbel_send_queue, - }, - }; -#endif struct ud_av_st *bcast_av = mlx->bcast_av; struct arbelprm_ud_address_vector *bav = ( struct arbelprm_ud_address_vector * ) &bcast_av->av; @@ -176,14 +151,8 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); -#if 0 - rc = arbel_post_send ( &ibdev, &qp, &av, iobuf ); - - next_tx_idx = qp.send.next_idx; -#endif rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); - return rc; } @@ -211,53 +180,9 @@ static void temp_complete_recv ( struct ib_device *ibdev __unused, DBG ( "AARGH! recv completion\n" ); } -static int next_cq_idx = 0; - static void mlx_poll_cq_direct ( struct net_device *netdev ) { struct mlx_nic *mlx = netdev->priv; -#if 0 - struct arbel arbel = { - .uar = memfree_pci_dev.uar, - .db_rec = dev_ib_data.uar_context_base, - }; - struct arbel_send_work_queue arbel_send_queue = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, - .wqe = ( ( struct udqp_st * ) mlx->ipoib_qph )->snd_wq, - }; - struct ib_device ibdev = { - .dev_priv = &arbel, - }; - struct ib_queue_pair qp = { - .qpn = ib_get_qpn ( mlx->ipoib_qph ), - .send = { - .num_wqes = NUM_IPOIB_SND_WQES, - .next_idx = next_tx_idx, - .iobufs = tx_ring, - .dev_priv = &arbel_send_queue, - }, - .priv = netdev, - }; - struct arbel_completion_queue arbel_cq = { - .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, - .cqe = ( ( struct cq_st * ) mlx->snd_cqh )->cq_buf, - }; - struct ib_completion_queue cq = { - .cqn = 1234, - .num_cqes = NUM_IPOIB_SND_CQES, - .next_idx = next_cq_idx, - .dev_priv = &arbel_cq, - }; - - INIT_LIST_HEAD ( &cq.queue_pairs ); - INIT_LIST_HEAD ( &qp.list ); - list_add ( &qp.list, &cq.queue_pairs ); - - arbel_poll_cq ( &ibdev, &cq, temp_complete_send, temp_complete_recv ); - - next_cq_idx = cq.next_idx; -#endif - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, temp_complete_send, temp_complete_recv ); } @@ -366,11 +291,7 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ -#if 0 - mlx_poll_cq ( netdev, mlx->snd_cqh, mlx_tx_complete ); -#else mlx_poll_cq_direct ( netdev ); -#endif mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); } @@ -390,11 +311,7 @@ static void mlx_irq ( struct net_device *netdev, int enable ) { static struct net_device_operations mlx_operations = { .open = mlx_open, .close = mlx_close, -#if 0 - .transmit = mlx_transmit, -#else .transmit = mlx_transmit_direct, -#endif .poll = mlx_poll, .irq = mlx_irq, }; From 37fc40bc8cd857a5e922b21b9e41580b39091c76 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 23:33:25 +0100 Subject: [PATCH 31/84] post_recv() now works, and we can pass data on the IPoIB queue pair using entirely our own code. --- src/drivers/net/mlx_ipoib/arbel.h | 27 ++- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 224 ++++++++++++++++++------- src/include/gpxe/infiniband.h | 36 +++- src/net/infiniband.c | 19 ++- 5 files changed, 234 insertions(+), 74 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index f35ef26b..9da6bef9 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -25,6 +25,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); @@ -37,13 +38,28 @@ struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); * */ -#define ARBELPRM_MAX_GATHER 1 +#define ARBEL_MAX_GATHER 1 struct arbelprm_ud_send_wqe { struct arbelprm_wqe_segment_next next; struct arbelprm_wqe_segment_ctrl_send ctrl; struct arbelprm_wqe_segment_ud ud; - struct arbelprm_wqe_segment_data_ptr data[ARBELPRM_MAX_GATHER]; + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; +} __attribute__ (( packed )); + +#define ARBEL_MAX_SCATTER 1 + +struct arbelprm_recv_wqe { + /* The autogenerated header is inconsistent between send and + * receive WQEs. The "ctrl" structure for receive WQEs is + * defined to include the "next" structure. Since the "ctrl" + * part of the "ctrl" structure contains only "reserved, must + * be zero" bits, we ignore its definition and provide + * something more usable. + */ + struct arbelprm_recv_wqe_segment_next next; + uint32_t ctrl[2]; /* All "reserved, must be zero" */ + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; } __attribute__ (( packed )); union arbelprm_completion_entry { @@ -88,6 +104,7 @@ struct arbel_send_work_queue { /** An Arbel receive work queue entry */ union arbel_recv_wqe { + struct arbelprm_recv_wqe recv; uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; } __attribute__ (( packed )); @@ -113,6 +130,12 @@ struct arbel { void *uar; /** Doorbell records */ union arbelprm_doorbell_record *db_rec; + /** Reserved LKey + * + * Used to get unrestricted memory access. + */ + unsigned long reserved_lkey; + }; #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index b6552f9f..45d7f46f 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1311,7 +1311,7 @@ static int create_ipoib_qp(void **qp_pp, qp->rcv_buf_sz = IPOIB_RCV_BUF_SZ; qp->max_recv_wqes = NUM_IPOIB_RCV_WQES; - qp->recv_wqe_cur_free = NUM_IPOIB_RCV_WQES; + qp->recv_wqe_cur_free = 0; //NUM_IPOIB_RCV_WQES; qp->rcv_uar_context = dev_ib_data.uar_context_base + 8 * IPOIB_RCV_QP_DB_IDX; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 385427fb..8fdc5909 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -26,6 +26,7 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { /** Queue pair handle */ @@ -36,35 +37,65 @@ struct mlx_nic { cq_t snd_cqh; /** Receive completion queue */ cq_t rcv_cqh; + + /** RX fill level */ + unsigned int rx_fill; }; static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; +static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, }; +static struct arbel_send_work_queue static_arbel_ipoib_recv_wq = { + .doorbell_idx = IPOIB_RCV_QP_DB_IDX, +}; static struct arbel_completion_queue static_arbel_ipoib_send_cq = { .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; +static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { + .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_send_cq; +static struct ib_completion_queue static_ipoib_recv_cq; static struct ib_device static_ibdev = { .dev_priv = &static_arbel, }; static struct ib_queue_pair static_ipoib_qp = { .send = { + .qp = &static_ipoib_qp, + .is_send = 1, + .cq = &static_ipoib_send_cq, .num_wqes = NUM_IPOIB_SND_WQES, .iobufs = static_ipoib_tx_ring, .dev_priv = &static_arbel_ipoib_send_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.send.list ), + }, + .recv = { + .qp = &static_ipoib_qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .dev_priv = &static_arbel_ipoib_recv_wq, + .list = LIST_HEAD_INIT ( static_ipoib_qp.recv.list ), }, - .list = LIST_HEAD_INIT ( static_ipoib_qp.list ), }; static struct ib_completion_queue static_ipoib_send_cq = { .cqn = 1234, /* Only used for debug messages */ .num_cqes = NUM_IPOIB_SND_CQES, .dev_priv = &static_arbel_ipoib_send_cq, - .queue_pairs = LIST_HEAD_INIT ( static_ipoib_send_cq.queue_pairs ), + .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), +}; +static struct ib_completion_queue static_ipoib_recv_cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .dev_priv = &static_arbel_ipoib_recv_cq, + .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), }; @@ -157,36 +188,6 @@ static int mlx_transmit_direct ( struct net_device *netdev, } -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ); - -static void temp_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; - - DBG ( "Wahey! TX completion\n" ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); -} - -static void temp_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp __unused, - struct ib_completion *completion __unused, - struct io_buffer *iobuf __unused ) { - DBG ( "AARGH! recv completion\n" ); -} - -static void mlx_poll_cq_direct ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, - temp_complete_send, temp_complete_recv ); -} - /** * Handle TX completion * @@ -233,6 +234,44 @@ static void mlx_rx_complete ( struct net_device *netdev, netdev_rx ( netdev, iobuf ); } +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ); + +static void temp_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + + DBG ( "Wahey! TX completion\n" ); + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +static void temp_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->priv; + struct mlx_nic *mlx = netdev->priv; + + DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + // DBG_HD ( iobuf->data, 256 ); + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + netdev_rx ( netdev, iobuf ); + } + + mlx->rx_fill--; +} + +#if 0 /** * Poll completion queue * @@ -267,6 +306,32 @@ static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, free_wqe ( ib_cqe.wqe ); } } +#endif + +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); + +static void mlx_refill_rx ( struct net_device *netdev ) { + struct mlx_nic *mlx = netdev->priv; + struct io_buffer *iobuf; + int rc; + + while ( mlx->rx_fill < MLX_RX_MAX_FILL ) { + iobuf = alloc_iob ( 2048 ); + if ( ! iobuf ) + break; + DBG ( "Posting RX buffer %p:\n", iobuf ); + // memset ( iobuf->data, 0xaa, 256 ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + mlx->rx_fill++; + } +} /** * Poll for completed and received packets @@ -291,8 +356,13 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - mlx_poll_cq_direct ( netdev ); - mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + temp_complete_send, temp_complete_recv ); + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + temp_complete_send, temp_complete_recv ); + // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); + + mlx_refill_rx ( netdev ); } /** @@ -397,12 +467,9 @@ static int arbel_post_send ( struct ib_device *ibdev, memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); MLX_FILL_1 ( &wqe->data[0], 3, local_address_l, virt_to_bus ( iobuf->data ) ); - MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); - - DBG ( "Work queue entry:\n" ); - DBG_HD ( wqe, sizeof ( *wqe ) ); /* Update previous work queue entry's "next" field */ nds = ( ( offsetof ( typeof ( *wqe ), data ) + @@ -413,16 +480,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); - DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &prev_wqe->next, sizeof ( prev_wqe->next ) ); - /* Update doorbell record */ + barrier(); db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; MLX_FILL_1 ( &db_rec->qp, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); - barrier(); - DBG ( "Doorbell record:\n" ); - DBG_HD ( db_rec, 8 ); /* Ring doorbell register */ MLX_FILL_4 ( &db_reg.send, 0, @@ -441,6 +503,51 @@ static int arbel_post_send ( struct ib_device *ibdev, return 0; } +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_work_queue *wq = &qp->recv; + struct arbel_recv_work_queue *arbel_recv_wq = wq->dev_priv; + struct arbelprm_recv_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + unsigned int wqe_idx_mask; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p receive queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update doorbell record */ + barrier(); + db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + /** * Handle completion * @@ -458,7 +565,6 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete_recv ) { struct arbel *arbel = ibdev->dev_priv; struct ib_completion completion; - struct ib_queue_pair *qp; struct ib_work_queue *wq; struct io_buffer *iobuf; struct arbel_send_work_queue *arbel_send_wq; @@ -466,7 +572,7 @@ static int arbel_complete ( struct ib_device *ibdev, ib_completer_t complete; unsigned int opcode; unsigned long qpn; - unsigned int is_send; + int is_send; unsigned long wqe_adr; unsigned int wqe_idx; int rc = 0; @@ -489,22 +595,20 @@ static int arbel_complete ( struct ib_device *ibdev, /* Don't return immediately; propagate error to completer */ } - /* Identify queue pair */ - qp = ib_find_qp ( &cq->queue_pairs, qpn ); - if ( ! qp ) { - DBGC ( arbel, "Arbel %p CQN %lx unknown QPN %lx\n", - arbel, cq->cqn, qpn ); + /* Identify work queue */ + wq = ib_find_wq ( cq, qpn, is_send ); + if ( ! wq ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", + arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); return -EIO; } /* Identify work queue entry index */ if ( is_send ) { - wq = &qp->send; arbel_send_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); } else { - wq = &qp->recv; arbel_recv_wq = wq->dev_priv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); @@ -521,7 +625,7 @@ static int arbel_complete ( struct ib_device *ibdev, /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, qp, &completion, iobuf ); + complete ( ibdev, wq->qp, &completion, iobuf ); return rc; } @@ -577,6 +681,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, + .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, }; @@ -636,14 +741,21 @@ static int arbel_probe ( struct pci_device *pci, /* Hack up IB structures */ static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; + static_arbel.reserved_lkey = dev_ib_data.mkey; static_arbel_ipoib_send_wq.wqe = ( ( struct udqp_st * ) qph )->snd_wq; + static_arbel_ipoib_recv_wq.wqe = + ( ( struct udqp_st * ) qph )->rcv_wq; static_arbel_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; + static_arbel_ipoib_recv_cq.cqe = + ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; static_ipoib_qp.qpn = ib_get_qpn ( qph ); static_ipoib_qp.priv = netdev; - list_add ( &static_ipoib_qp.list, - &static_ipoib_send_cq.queue_pairs ); + list_add ( &static_ipoib_qp.send.list, + &static_ipoib_send_cq.work_queues ); + list_add ( &static_ipoib_qp.recv.list, + &static_ipoib_recv_cq.work_queues ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3679a110..85684b63 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -64,9 +64,19 @@ struct ibhdr { struct ib_device; +struct ib_queue_pair; +struct ib_completion_queue; /** An Infiniband Work Queue */ struct ib_work_queue { + /** Containing queue pair */ + struct ib_queue_pair *qp; + /** "Is a send queue" flag */ + int is_send; + /** Associated completion queue */ + struct ib_completion_queue *cq; + /** List of work queues on this completion queue */ + struct list_head list; /** Number of work queue entries */ unsigned int num_wqes; /** Next work queue entry index @@ -85,8 +95,6 @@ struct ib_work_queue { /** An Infiniband Queue Pair */ struct ib_queue_pair { - /** List of queue pairs sharing a completion queue */ - struct list_head list; /** Queue Pair Number */ unsigned long qpn; /** Send queue */ @@ -113,8 +121,8 @@ struct ib_completion_queue { * array index. */ unsigned long next_idx; - /** List of associated queue pairs */ - struct list_head queue_pairs; + /** List of work queues completing to this queue */ + struct list_head work_queues; /** Device private data */ void *dev_priv; }; @@ -183,6 +191,22 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); + /** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + * + * If this method returns success, the I/O buffer remains + * owned by the queue pair. If this method returns failure, + * the I/O buffer is immediately released; the failure is + * interpreted as "failure to enqueue buffer". + */ + int ( * post_recv ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ); /** Poll completion queue * * @v ibdev Infiniband device @@ -205,8 +229,8 @@ struct ib_device { }; -extern struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ); +extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ); diff --git a/src/net/infiniband.c b/src/net/infiniband.c index edc93b6e..694c88b1 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -34,19 +34,20 @@ */ /** - * Find queue pair from a list + * Find work queue belonging to completion queue * - * @v list List of queue pairs + * @v cq Completion queue * @v qpn Queue pair number - * @ret qp Queue pair, or NULL if not found + * @v is_send Find send work queue (rather than receive) + * @ret wq Work queue, or NULL if not found */ -struct ib_queue_pair * ib_find_qp ( struct list_head *list, - unsigned long qpn ) { - struct ib_queue_pair *qp; +struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, + unsigned long qpn, int is_send ) { + struct ib_work_queue *wq; - list_for_each_entry ( qp, list, list ) { - if ( qp->qpn == qpn ) - return qp; + list_for_each_entry ( wq, &cq->work_queues, list ) { + if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) ) + return wq; } return NULL; } From baa885ee8e64faf40486618695024203a57e91f7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sat, 15 Sep 2007 23:36:41 +0100 Subject: [PATCH 32/84] Kill off more dead code. --- src/drivers/net/mlx_ipoib/mt25218.c | 123 ---------------------------- 1 file changed, 123 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 8fdc5909..2e147a0e 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -124,41 +124,6 @@ static void mlx_close ( struct net_device *netdev ) { } -#warning "Broadcast address?" -static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; - - -/** - * Transmit packet - * - * @v netdev Network device - * @v iobuf I/O buffer - * @ret rc Return status code - */ -static int mlx_transmit ( struct net_device *netdev, - struct io_buffer *iobuf ) { - struct mlx_nic *mlx = netdev->priv; - ud_send_wqe_t snd_wqe; - int rc; - - snd_wqe = alloc_send_wqe ( mlx->ipoib_qph ); - if ( ! snd_wqe ) { - DBGC ( mlx, "MLX %p out of TX WQEs\n", mlx ); - return -ENOBUFS; - } - - prep_send_wqe_buf ( mlx->ipoib_qph, mlx->bcast_av, snd_wqe, - iobuf->data, 0, iob_len ( iobuf ), 0 ); - if ( ( rc = post_send_req ( mlx->ipoib_qph, snd_wqe, 1 ) ) != 0 ) { - DBGC ( mlx, "MLX %p could not post TX WQE %p: %s\n", - mlx, snd_wqe, strerror ( rc ) ); - free_wqe ( snd_wqe ); - return rc; - } - - return 0; -} - static int arbel_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, @@ -187,53 +152,6 @@ static int mlx_transmit_direct ( struct net_device *netdev, return rc; } - -/** - * Handle TX completion - * - * @v netdev Network device - * @v ib_cqe Completion queue entry - */ -static void mlx_tx_complete ( struct net_device *netdev, - struct ib_cqe_st *ib_cqe ) { - netdev_tx_complete_next_err ( netdev, - ( ib_cqe->is_error ? -EIO : 0 ) ); -} - -/** - * Handle RX completion - * - * @v netdev Network device - * @v ib_cqe Completion queue entry - */ -static void mlx_rx_complete ( struct net_device *netdev, - struct ib_cqe_st *ib_cqe ) { - unsigned int len; - struct io_buffer *iobuf; - void *buf; - - /* Check for errors */ - if ( ib_cqe->is_error ) { - netdev_rx_err ( netdev, NULL, -EIO ); - return; - } - - /* Allocate I/O buffer */ - len = ( ib_cqe->count - GRH_SIZE ); - iobuf = alloc_iob ( len ); - if ( ! iobuf ) { - netdev_rx_err ( netdev, NULL, -ENOMEM ); - return; - } - - /* Fill I/O buffer */ - buf = get_rcv_wqe_buf ( ib_cqe->wqe, 1 ); - memcpy ( iob_put ( iobuf, len ), buf, len ); - - /* Hand off to network stack */ - netdev_rx ( netdev, iobuf ); -} - static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, @@ -258,8 +176,6 @@ static void temp_complete_recv ( struct ib_device *ibdev __unused, struct mlx_nic *mlx = netdev->priv; DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); - // DBG_HD ( iobuf, sizeof ( *iobuf ) ); - // DBG_HD ( iobuf->data, 256 ); if ( completion->syndrome ) { netdev_rx_err ( netdev, iobuf, -EIO ); } else { @@ -271,43 +187,6 @@ static void temp_complete_recv ( struct ib_device *ibdev __unused, mlx->rx_fill--; } -#if 0 -/** - * Poll completion queue - * - * @v netdev Network device - * @v cq Completion queue - * @v handler Completion handler - */ -static void mlx_poll_cq ( struct net_device *netdev, cq_t cq, - void ( * handler ) ( struct net_device *netdev, - struct ib_cqe_st *ib_cqe ) ) { - struct mlx_nic *mlx = netdev->priv; - struct ib_cqe_st ib_cqe; - uint8_t num_cqes; - - while ( 1 ) { - - /* Poll for single completion queue entry */ - ib_poll_cq ( cq, &ib_cqe, &num_cqes ); - - /* Return if no entries in the queue */ - if ( ! num_cqes ) - return; - - DBGC ( mlx, "MLX %p cpl in %p: err %x send %x " - "wqe %p count %lx\n", mlx, cq, ib_cqe.is_error, - ib_cqe.is_send, ib_cqe.wqe, ib_cqe.count ); - - /* Handle TX/RX completion */ - handler ( netdev, &ib_cqe ); - - /* Free associated work queue entry */ - free_wqe ( ib_cqe.wqe ); - } -} -#endif - static int arbel_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ); @@ -322,8 +201,6 @@ static void mlx_refill_rx ( struct net_device *netdev ) { if ( ! iobuf ) break; DBG ( "Posting RX buffer %p:\n", iobuf ); - // memset ( iobuf->data, 0xaa, 256 ); - // DBG_HD ( iobuf, sizeof ( *iobuf ) ); if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); From bf9bd938565f4fa99a53c4820f1aea4d9cdb6bbb Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 01:44:57 +0100 Subject: [PATCH 33/84] First (working) draft of command interface. --- src/drivers/net/mlx_ipoib/arbel.h | 27 ++++++ src/drivers/net/mlx_ipoib/mt25218.c | 136 +++++++++++++++++++++++++++- 2 files changed, 159 insertions(+), 4 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 9da6bef9..79d01b2b 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -16,14 +16,34 @@ #define ARBEL_OPCODE_RECV_ERROR 0xfe #define ARBEL_OPCODE_SEND_ERROR 0xff +/* + * HCA commands + * + */ + +#define ARBEL_HCR_BASE 0x80680 +#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) +#define ARBEL_HCR_MAX_WAIT_MS 2000 + +#define ARBEL_HCR_OPCODE_MASK 0x0000ffffUL +#define ARBEL_HCR_IN_IMMEDIATE 0x00010000UL +#define ARBEL_HCR_IN_MAILBOX 0x00020000UL +#define ARBEL_HCR_OUT_IMMEDIATE 0x00040000UL +#define ARBEL_HCR_OUT_MAILBOX 0x00080000UL + +#define ARBEL_HCR_OP_SW2HW_CQ ( 0x0016 | ARBEL_HCR_IN_MAILBOX ) +#define ARBEL_HCR_OP_NOP ( 0x0031 ) + /* * Wrapper structures for hardware datatypes * */ +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); @@ -126,6 +146,13 @@ struct arbel_completion_queue { /** An Arbel device */ struct arbel { + /** Configuration registers */ + void *config; + /** Command input mailbox */ + void *mailbox_in; + /** Command output mailbox */ + void *mailbox_out; + /** User Access Region */ void *uar; /** Doorbell records */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 2e147a0e..3625991c 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -26,6 +26,11 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +static const struct ib_gid arbel_no_gid = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } +}; + + #define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { @@ -263,11 +268,121 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; +/** + * Wait for Arbel command completion + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_command_wait ( struct arbel *arbel, + struct arbelprm_hca_command_register *hcr ) { + unsigned int wait; + + for ( wait = ARBEL_HCR_MAX_WAIT_MS ; wait ; wait-- ) { + hcr->u.dwords[6] = + readl ( arbel->config + ARBEL_HCR_REG ( 6 ) ); + if ( MLX_GET ( hcr, go ) == 0 ) + return 0; + mdelay ( 1 ); + } + return -EBUSY; +} + +/** + * Issue HCA command + * + * @v arbel Arbel device + * @v op_fl Opcode (plus implied flags) + * @v op_mod Opcode modifier (0 if no modifier applicable) + * @v in_param Input parameter + * @v in_param_len Input parameter length + * @v in_mod Input modifier (0 if no modifier applicable) + * @v out_param Output parameter + * @ret rc Return status code + */ +static int arbel_command ( struct arbel *arbel, unsigned int op_fl, + unsigned int op_mod, const void *in_param, + size_t in_param_len, unsigned int in_mod, + void *out_param, size_t out_param_len ) { + struct arbelprm_hca_command_register hcr; + unsigned int status; + unsigned int i; + int rc; + + /* Check that HCR is free */ + if ( ( rc = arbel_command_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p command interface locked\n", arbel ); + return rc; + } + + /* Prepare HCR */ + memset ( &hcr, 0, sizeof ( hcr ) ); + if ( op_fl & ARBEL_HCR_IN_IMMEDIATE ) { + memcpy ( &hcr.u.dwords[0], in_param, 8 ); + } else if ( op_fl & ARBEL_HCR_IN_MAILBOX ) { + memcpy ( arbel->mailbox_in, in_param, in_param_len ); + MLX_FILL_1 ( &hcr, 1, in_param_l, + virt_to_bus ( arbel->mailbox_in ) ); + } + MLX_FILL_1 ( &hcr, 2, input_modifier, in_mod ); + if ( op_fl & ARBEL_HCR_OUT_MAILBOX ) { + MLX_FILL_1 ( &hcr, 4, out_param_l, + virt_to_bus ( arbel->mailbox_out ) ); + } + MLX_FILL_3 ( &hcr, 6, + opcode, ( op_fl & ARBEL_HCR_OPCODE_MASK ), + opcode_modifier, op_mod, + go, 1 ); + + /* Issue command */ + for ( i = 0 ; i < ( sizeof ( hcr ) / sizeof ( hcr.u.dwords[0] ) ) ; + i++ ) { + writel ( hcr.u.dwords[i], + arbel->config + ARBEL_HCR_REG ( i ) ); + barrier(); + } + + /* Wait for command completion */ + if ( ( rc = arbel_command_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p timed out waiting for command:\n", + arbel ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return rc; + } + + /* Check command status */ + status = MLX_GET ( &hcr, status ); + if ( status != 0 ) { + DBGC ( arbel, "Arbel %p command failed with status %02x:\n", + arbel, status ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return -EIO; + } + + /* Read output parameters, if any */ + hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); + hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); + if ( op_fl & ARBEL_HCR_OUT_IMMEDIATE ) { + memcpy ( out_param, &hcr.u.dwords[3], 8 ); + } else if ( op_fl & ARBEL_HCR_OUT_MAILBOX ) { + memcpy ( out_param, arbel->mailbox_out, out_param_len ); + } + + return 0; +} + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v + */ +static int arbel_create_cq ( struct ib_device *ibdev ) { + struct arbelprm_completion_queue_context *cqctx; -static struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } -}; +} + /** * Ring doorbell register in UAR @@ -310,7 +425,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct arbelprm_ud_send_wqe *wqe; union arbelprm_doorbell_record *db_rec; union arbelprm_doorbell_register db_reg; - struct ib_gid *gid; + const struct ib_gid *gid; unsigned int wqe_idx_mask; size_t nds; @@ -616,6 +731,9 @@ static int arbel_probe ( struct pci_device *pci, memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); /* Hack up IB structures */ + static_arbel.config = memfree_pci_dev.cr_space; + static_arbel.mailbox_in = dev_buffers_p->inprm_buf; + static_arbel.mailbox_out = dev_buffers_p->outprm_buf; static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; static_arbel.reserved_lkey = dev_ib_data.mkey; @@ -634,6 +752,16 @@ static int arbel_probe ( struct pci_device *pci, list_add ( &static_ipoib_qp.recv.list, &static_ipoib_recv_cq.work_queues ); + uint8_t buf[512]; + memset ( buf, 0xaa, sizeof ( buf ) ); + if ( ( rc = arbel_command ( &static_arbel, + ( 0x03 | ARBEL_HCR_OUT_MAILBOX ), 0, + NULL, 0, 0, buf, 256 ) ) != 0 ) { + DBG ( "QUERY_DEV_LIM failed: %s\n", strerror ( rc ) ); + } + DBG ( "Device limits:\n "); + DBG_HD ( &buf[0], sizeof ( buf ) ); + /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; From 791f992657d662f4e9645fcdcc9a39dc6d2a2359 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 03:13:25 +0100 Subject: [PATCH 34/84] Command interface now reasonably friendly. --- src/drivers/net/mlx_ipoib/arbel.h | 61 ++++++++++++++------- src/drivers/net/mlx_ipoib/mt25218.c | 83 ++++++++++++++++++----------- 2 files changed, 94 insertions(+), 50 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 79d01b2b..1cf92bde 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -16,24 +16,6 @@ #define ARBEL_OPCODE_RECV_ERROR 0xfe #define ARBEL_OPCODE_SEND_ERROR 0xff -/* - * HCA commands - * - */ - -#define ARBEL_HCR_BASE 0x80680 -#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) -#define ARBEL_HCR_MAX_WAIT_MS 2000 - -#define ARBEL_HCR_OPCODE_MASK 0x0000ffffUL -#define ARBEL_HCR_IN_IMMEDIATE 0x00010000UL -#define ARBEL_HCR_IN_MAILBOX 0x00020000UL -#define ARBEL_HCR_OUT_IMMEDIATE 0x00040000UL -#define ARBEL_HCR_OUT_MAILBOX 0x00080000UL - -#define ARBEL_HCR_OP_SW2HW_CQ ( 0x0016 | ARBEL_HCR_IN_MAILBOX ) -#define ARBEL_HCR_OP_NOP ( 0x0031 ) - /* * Wrapper structures for hardware datatypes * @@ -45,6 +27,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); @@ -165,4 +148,46 @@ struct arbel { }; +/* + * HCA commands + * + */ + +#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 + +#define ARBEL_HCR_BASE 0x80680 +#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) +#define ARBEL_HCR_MAX_WAIT_MS 2000 + +/* HCA command is split into + * + * bits 11:0 Opcode + * bit 12 Input uses mailbox + * bit 13 Output uses mailbox + * bits 22:14 Input parameter length (in dwords) + * bits 31:23 Output parameter length (in dwords) + * + * Encoding the information in this way allows us to cut out several + * parameters to the arbel_command() call. + */ +#define ARBEL_HCR_IN_MBOX 0x00001000UL +#define ARBEL_HCR_OUT_MBOX 0x00002000UL +#define ARBEL_HCR_OPCODE( _command ) ( (_command) & 0xfff ) +#define ARBEL_HCR_IN_LEN( _command ) ( ( (_command) >> 12 ) & 0x7fc ) +#define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) + +/** Build HCR command from component parts */ +#define ARBEL_HCR_CMD( _opcode, _in_mbox, _in_len, _out_mbox, _out_len ) \ + ( (_opcode) | \ + ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ + ( ( (_in_len) / 4 ) << 14 ) | \ + ( (_out_mbox) ? ARBEL_HCR_OUT_MBOX : 0 ) | \ + ( ( (_out_len) / 4 ) << 23 ) ) + +#define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ + ARBEL_HCR_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) + +#define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ + ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) + #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 3625991c..180ec6da 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -268,14 +268,21 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; +/*************************************************************************** + * + * HCA commands + * + *************************************************************************** + */ + /** * Wait for Arbel command completion * * @v arbel Arbel device * @ret rc Return status code */ -static int arbel_command_wait ( struct arbel *arbel, - struct arbelprm_hca_command_register *hcr ) { +static int arbel_cmd_wait ( struct arbel *arbel, + struct arbelprm_hca_command_register *hcr ) { unsigned int wait; for ( wait = ARBEL_HCR_MAX_WAIT_MS ; wait ; wait-- ) { @@ -292,45 +299,54 @@ static int arbel_command_wait ( struct arbel *arbel, * Issue HCA command * * @v arbel Arbel device - * @v op_fl Opcode (plus implied flags) + * @v command Command opcode, flags and input/output lengths * @v op_mod Opcode modifier (0 if no modifier applicable) - * @v in_param Input parameter - * @v in_param_len Input parameter length + * @v in Input parameters * @v in_mod Input modifier (0 if no modifier applicable) - * @v out_param Output parameter + * @v out Output parameters * @ret rc Return status code */ -static int arbel_command ( struct arbel *arbel, unsigned int op_fl, - unsigned int op_mod, const void *in_param, - size_t in_param_len, unsigned int in_mod, - void *out_param, size_t out_param_len ) { +static int arbel_cmd ( struct arbel *arbel, unsigned long command, + unsigned int op_mod, const void *in, + unsigned int in_mod, void *out ) { struct arbelprm_hca_command_register hcr; + unsigned int opcode = ARBEL_HCR_OPCODE ( command ); + size_t in_len = ARBEL_HCR_IN_LEN ( command ); + size_t out_len = ARBEL_HCR_OUT_LEN ( command ); + void *in_buffer; + void *out_buffer; unsigned int status; unsigned int i; int rc; + DBGC ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", + arbel, opcode, in_len, + ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, + ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); + /* Check that HCR is free */ - if ( ( rc = arbel_command_wait ( arbel, &hcr ) ) != 0 ) { + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { DBGC ( arbel, "Arbel %p command interface locked\n", arbel ); return rc; } /* Prepare HCR */ memset ( &hcr, 0, sizeof ( hcr ) ); - if ( op_fl & ARBEL_HCR_IN_IMMEDIATE ) { - memcpy ( &hcr.u.dwords[0], in_param, 8 ); - } else if ( op_fl & ARBEL_HCR_IN_MAILBOX ) { - memcpy ( arbel->mailbox_in, in_param, in_param_len ); - MLX_FILL_1 ( &hcr, 1, in_param_l, - virt_to_bus ( arbel->mailbox_in ) ); + in_buffer = &hcr.u.dwords[0]; + if ( in_len && ( command & ARBEL_HCR_IN_MBOX ) ) { + in_buffer = arbel->mailbox_in; + MLX_FILL_1 ( &hcr, 1, in_param_l, virt_to_bus ( in_buffer ) ); } + memcpy ( in_buffer, in, in_len ); MLX_FILL_1 ( &hcr, 2, input_modifier, in_mod ); - if ( op_fl & ARBEL_HCR_OUT_MAILBOX ) { + out_buffer = &hcr.u.dwords[3]; + if ( out_len && ( command & ARBEL_HCR_OUT_MBOX ) ) { + out_buffer = arbel->mailbox_out; MLX_FILL_1 ( &hcr, 4, out_param_l, - virt_to_bus ( arbel->mailbox_out ) ); + virt_to_bus ( out_buffer ) ); } MLX_FILL_3 ( &hcr, 6, - opcode, ( op_fl & ARBEL_HCR_OPCODE_MASK ), + opcode, opcode, opcode_modifier, op_mod, go, 1 ); @@ -343,7 +359,7 @@ static int arbel_command ( struct arbel *arbel, unsigned int op_fl, } /* Wait for command completion */ - if ( ( rc = arbel_command_wait ( arbel, &hcr ) ) != 0 ) { + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { DBGC ( arbel, "Arbel %p timed out waiting for command:\n", arbel ); DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); @@ -362,15 +378,19 @@ static int arbel_command ( struct arbel *arbel, unsigned int op_fl, /* Read output parameters, if any */ hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); - if ( op_fl & ARBEL_HCR_OUT_IMMEDIATE ) { - memcpy ( out_param, &hcr.u.dwords[3], 8 ); - } else if ( op_fl & ARBEL_HCR_OUT_MAILBOX ) { - memcpy ( out_param, arbel->mailbox_out, out_param_len ); - } + memcpy ( out, out_buffer, out_len ); return 0; } +static int arbel_cmd_query_dev_lim ( struct arbel *arbel, + struct arbelprm_query_dev_lim *out ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, + 1, sizeof ( *out ) ), + 0, NULL, 0, out ); +} + /** * Create completion queue * @@ -752,15 +772,14 @@ static int arbel_probe ( struct pci_device *pci, list_add ( &static_ipoib_qp.recv.list, &static_ipoib_recv_cq.work_queues ); - uint8_t buf[512]; - memset ( buf, 0xaa, sizeof ( buf ) ); - if ( ( rc = arbel_command ( &static_arbel, - ( 0x03 | ARBEL_HCR_OUT_MAILBOX ), 0, - NULL, 0, 0, buf, 256 ) ) != 0 ) { + struct arbelprm_query_dev_lim dev_lim; + memset ( &dev_lim, 0xaa, sizeof ( dev_lim ) ); + if ( ( rc = arbel_cmd_query_dev_lim ( &static_arbel, + &dev_lim ) ) != 0 ) { DBG ( "QUERY_DEV_LIM failed: %s\n", strerror ( rc ) ); } DBG ( "Device limits:\n "); - DBG_HD ( &buf[0], sizeof ( buf ) ); + DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) From 156b409ccc2e1e9c08784b49471c60c4950d7603 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 04:02:20 +0100 Subject: [PATCH 35/84] Rearrange data structures to maximise embedding (and hence minimise the number of separate allocations that need to be done). --- src/drivers/net/mlx_ipoib/arbel.h | 13 ++ src/drivers/net/mlx_ipoib/mt25218.c | 191 +++++++++++++++++----------- src/include/gpxe/infiniband.h | 10 +- 3 files changed, 132 insertions(+), 82 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 1cf92bde..a41b6330 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -119,8 +119,20 @@ struct arbel_recv_work_queue { union arbel_recv_wqe *wqe; }; +/** An Arbel queue pair */ +struct arbel_queue_pair { + /** Infiniband queue pair */ + struct ib_queue_pair qp; + /** Send work queue */ + struct arbel_send_work_queue send; + /** Receive work queue */ + struct arbel_recv_work_queue recv; +}; + /** An Arbel completion queue */ struct arbel_completion_queue { + /** Infiniband completion queue */ + struct ib_completion_queue cq; /** Doorbell record number */ unsigned int doorbell_idx; /** Completion queue entries */ @@ -154,6 +166,7 @@ struct arbel { */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_BASE 0x80680 #define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 180ec6da..4fcc6a3c 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -52,57 +52,54 @@ static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; -static struct arbel_send_work_queue static_arbel_ipoib_send_wq = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, -}; -static struct arbel_send_work_queue static_arbel_ipoib_recv_wq = { - .doorbell_idx = IPOIB_RCV_QP_DB_IDX, -}; -static struct arbel_completion_queue static_arbel_ipoib_send_cq = { - .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { - .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, -}; +static struct arbel_completion_queue static_ipoib_send_cq; +static struct arbel_completion_queue static_ipoib_recv_cq; -static struct ib_completion_queue static_ipoib_send_cq; -static struct ib_completion_queue static_ipoib_recv_cq; -static struct ib_device static_ibdev = { - .dev_priv = &static_arbel, -}; -static struct ib_queue_pair static_ipoib_qp = { +static struct arbel_queue_pair static_ipoib_qp = { + .qp = { + .send = { + .qp = &static_ipoib_qp.qp, + .is_send = 1, + .cq = &static_ipoib_send_cq.cq, + .num_wqes = NUM_IPOIB_SND_WQES, + .iobufs = static_ipoib_tx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.qp.send.list), + }, + .recv = { + .qp = &static_ipoib_qp.qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq.cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.qp.recv.list), + }, + }, .send = { - .qp = &static_ipoib_qp, - .is_send = 1, - .cq = &static_ipoib_send_cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .dev_priv = &static_arbel_ipoib_send_wq, - .list = LIST_HEAD_INIT ( static_ipoib_qp.send.list ), + .doorbell_idx = IPOIB_SND_QP_DB_IDX, }, .recv = { - .qp = &static_ipoib_qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .dev_priv = &static_arbel_ipoib_recv_wq, - .list = LIST_HEAD_INIT ( static_ipoib_qp.recv.list ), + .doorbell_idx = IPOIB_RCV_QP_DB_IDX, }, }; -static struct ib_completion_queue static_ipoib_send_cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .dev_priv = &static_arbel_ipoib_send_cq, - .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), +static struct arbel_completion_queue static_ipoib_send_cq = { + .cq = { + .cqn = 1234, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_SND_CQES, + .work_queues = LIST_HEAD_INIT (static_ipoib_send_cq.cq.work_queues), + }, + .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; -static struct ib_completion_queue static_ipoib_recv_cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .dev_priv = &static_arbel_ipoib_recv_cq, - .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), +static struct arbel_completion_queue static_ipoib_recv_cq = { + .cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .work_queues = LIST_HEAD_INIT (static_ipoib_recv_cq.cq.work_queues), + }, + .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_device static_ibdev = { + .priv = &static_arbel, }; - /** @@ -152,7 +149,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); + rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp.qp, &av, iobuf ); return rc; } @@ -206,7 +203,8 @@ static void mlx_refill_rx ( struct net_device *netdev ) { if ( ! iobuf ) break; DBG ( "Posting RX buffer %p:\n", iobuf ); - if ( ( rc = arbel_post_recv ( &static_ibdev, &static_ipoib_qp, + if ( ( rc = arbel_post_recv ( &static_ibdev, + &static_ipoib_qp.qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; @@ -238,9 +236,9 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq.cq, temp_complete_send, temp_complete_recv ); - arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq.cq, temp_complete_send, temp_complete_recv ); // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); @@ -383,27 +381,63 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, return 0; } -static int arbel_cmd_query_dev_lim ( struct arbel *arbel, - struct arbelprm_query_dev_lim *out ) { +static inline int +arbel_cmd_query_dev_lim ( struct arbel *arbel, + struct arbelprm_query_dev_lim *dev_lim ) { return arbel_cmd ( arbel, ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, - 1, sizeof ( *out ) ), - 0, NULL, 0, out ); + 1, sizeof ( *dev_lim ) ), + 0, NULL, 0, dev_lim ); } +static inline int +arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, + const struct arbelprm_completion_queue_context *cqctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_CQ, + 1, sizeof ( *cqctx ) ), + 0, cqctx, cqn, NULL ); +} + +/*************************************************************************** + * + * Completion queue operations + * + *************************************************************************** + */ + /** * Create completion queue * * @v ibdev Infiniband device * @v */ -static int arbel_create_cq ( struct ib_device *ibdev ) { - struct arbelprm_completion_queue_context *cqctx; +static int arbel_create_cq ( struct ib_device *ibdev, + struct ib_completion_queue **new_cq ) { + struct arbel *arbel = ibdev->priv; + struct arbelprm_completion_queue_context cqctx; + struct ib_completion_queue *cq; + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return -ENOMEM; + + + memset ( &cqctx, 0, sizeof ( cqctx ) ); + + + return arbel_cmd_sw2hw_cq ( arbel, 0, &cqctx ); } +/*************************************************************************** + * + * Work request operations + * + *************************************************************************** + */ + /** * Ring doorbell register in UAR * @@ -438,9 +472,11 @@ static int arbel_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_queue_pair *arbel_qp + = container_of ( qp, struct arbel_queue_pair, qp ); struct ib_work_queue *wq = &qp->send; - struct arbel_send_work_queue *arbel_send_wq = wq->dev_priv; + struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; struct arbelprm_ud_send_wqe *prev_wqe; struct arbelprm_ud_send_wqe *wqe; union arbelprm_doorbell_record *db_rec; @@ -526,9 +562,11 @@ static int arbel_post_send ( struct ib_device *ibdev, static int arbel_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_queue_pair *arbel_qp + = container_of ( qp, struct arbel_queue_pair, qp ); struct ib_work_queue *wq = &qp->recv; - struct arbel_recv_work_queue *arbel_recv_wq = wq->dev_priv; + struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; struct arbelprm_recv_wqe *wqe; union arbelprm_doorbell_record *db_rec; unsigned int wqe_idx_mask; @@ -575,12 +613,14 @@ static int arbel_complete ( struct ib_device *ibdev, union arbelprm_completion_entry *cqe, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; + struct arbel *arbel = ibdev->priv; struct ib_completion completion; struct ib_work_queue *wq; - struct io_buffer *iobuf; + struct ib_queue_pair *qp; + struct arbel_queue_pair *arbel_qp; struct arbel_send_work_queue *arbel_send_wq; struct arbel_recv_work_queue *arbel_recv_wq; + struct io_buffer *iobuf; ib_completer_t complete; unsigned int opcode; unsigned long qpn; @@ -614,14 +654,16 @@ static int arbel_complete ( struct ib_device *ibdev, arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); return -EIO; } + qp = wq->qp; + arbel_qp = container_of ( qp, struct arbel_queue_pair, qp ); /* Identify work queue entry index */ if ( is_send ) { - arbel_send_wq = wq->dev_priv; + arbel_send_wq = &arbel_qp->send; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); } else { - arbel_recv_wq = wq->dev_priv; + arbel_recv_wq = &arbel_qp->recv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); } @@ -637,7 +679,7 @@ static int arbel_complete ( struct ib_device *ibdev, /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, wq->qp, &completion, iobuf ); + complete ( ibdev, qp, &completion, iobuf ); return rc; } @@ -654,8 +696,9 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq + = container_of ( cq, struct arbel_completion_queue, cq ); union arbelprm_doorbell_record *db_rec; union arbelprm_completion_entry *cqe; unsigned int cqe_idx_mask; @@ -757,20 +800,20 @@ static int arbel_probe ( struct pci_device *pci, static_arbel.uar = memfree_pci_dev.uar; static_arbel.db_rec = dev_ib_data.uar_context_base; static_arbel.reserved_lkey = dev_ib_data.mkey; - static_arbel_ipoib_send_wq.wqe = + static_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; - static_arbel_ipoib_recv_wq.wqe = + static_ipoib_qp.recv.wqe = ( ( struct udqp_st * ) qph )->rcv_wq; - static_arbel_ipoib_send_cq.cqe = + static_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_arbel_ipoib_recv_cq.cqe = + static_ipoib_recv_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.priv = netdev; - list_add ( &static_ipoib_qp.send.list, - &static_ipoib_send_cq.work_queues ); - list_add ( &static_ipoib_qp.recv.list, - &static_ipoib_recv_cq.work_queues ); + static_ipoib_qp.qp.qpn = ib_get_qpn ( qph ); + static_ipoib_qp.qp.priv = netdev; + list_add ( &static_ipoib_qp.qp.send.list, + &static_ipoib_send_cq.cq.work_queues ); + list_add ( &static_ipoib_qp.qp.recv.list, + &static_ipoib_recv_cq.cq.work_queues ); struct arbelprm_query_dev_lim dev_lim; memset ( &dev_lim, 0xaa, sizeof ( dev_lim ) ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 85684b63..dd8022fb 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -89,8 +89,6 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; - /** Device private data */ - void *dev_priv; }; /** An Infiniband Queue Pair */ @@ -103,8 +101,6 @@ struct ib_queue_pair { struct ib_work_queue recv; /** Queue owner private data */ void *priv; - /** Device private data */ - void *dev_priv; }; /** An Infiniband Completion Queue */ @@ -123,8 +119,6 @@ struct ib_completion_queue { unsigned long next_idx; /** List of work queues completing to this queue */ struct list_head work_queues; - /** Device private data */ - void *dev_priv; }; /** An Infiniband completion */ @@ -224,8 +218,8 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Device private data */ - void *dev_priv; + /** Driver private data */ + void *priv; }; From 5a43293c385d8a64a90a518b8759f4b427706fcf Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 12:52:58 +0100 Subject: [PATCH 36/84] Started to add code for CQ creation --- src/drivers/net/mlx_ipoib/arbel.h | 85 ++++++++++++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 132 +++++++++++++++++++++++++--- src/drivers/net/mlx_ipoib/mt25218.h | 16 ++-- 3 files changed, 212 insertions(+), 21 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index a41b6330..68c6282c 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -85,6 +85,12 @@ union arbelprm_doorbell_register { * */ +/** Arbel device limits */ +struct arbel_dev_limits { + /** Number of reserved CQs */ + unsigned long reserved_cqs; +}; + /** Alignment of Arbel send work queue entries */ #define ARBEL_SEND_WQE_ALIGN 128 @@ -129,6 +135,12 @@ struct arbel_queue_pair { struct arbel_recv_work_queue recv; }; +/** Maximum number of allocatable completion queues + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_CQS 8 + /** An Arbel completion queue */ struct arbel_completion_queue { /** Infiniband completion queue */ @@ -139,6 +151,14 @@ struct arbel_completion_queue { union arbelprm_completion_entry *cqe; }; +/** An Arbel resource bitmask */ +typedef uint32_t arbel_bitmask_t; + +/** Size of an Arbel resource bitmask */ +#define ARBEL_BITMASK_SIZE(max_entries) \ + ( ( (max_entries) + ( 8 * sizeof ( arbel_bitmask_t ) ) - 1 ) / \ + ( 8 * sizeof ( arbel_bitmask_t ) ) ) + /** An Arbel device */ struct arbel { /** Configuration registers */ @@ -157,7 +177,12 @@ struct arbel { * Used to get unrestricted memory access. */ unsigned long reserved_lkey; + + /** Completion queue in-use bitmask */ + arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; + /** Device limits */ + struct arbel_dev_limits limits; }; /* @@ -203,4 +228,64 @@ struct arbel { #define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) +/* + * Doorbell record allocation + * + * The doorbell record map looks like: + * + * ARBEL_MAX_CQS * Arm completion queue doorbell + * ARBEL_MAX_QPS * Send work request doorbell + * Group separator + * ...(empty space)... + * ARBEL_MAX_QPS * Receive work request doorbell + * ARBEL_MAX_CQS * Completion queue consumer counter update doorbell + */ + +#define ARBEL_MAX_DOORBELL_RECORDS 512 +#define ARBEL_GROUP_SEPARATOR_DOORBELL ( ARBEL_MAX_CQS + ARBEL_MAX_QPS ) + +/** + * Get arm completion queue doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_arm_cq_doorbell_idx ( unsigned int cqn_offset ) { + return cqn_offset; +} + +/** + * Get send work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_send_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_CQS + qpn_offset ); +} + +/** + * Get receive work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - ARBEL_MAX_CQS - qpn_offset - 1 ); +} + +/** + * Get commpletion queue consumer counter doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_cq_ci_doorbell_idx ( unsigned int cqn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - cqn_offset - 1 ); +} + #endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 4fcc6a3c..925b00f9 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -12,6 +12,7 @@ Skeleton NIC driver for Etherboot #include #include +#include #include #include #include @@ -266,6 +267,50 @@ static struct net_device_operations mlx_operations = { .irq = mlx_irq, }; + + + +/** + * Allocate queue number + * + * @v q_inuse Queue usage bitmask + * @v max_inuse Maximum number of in-use queues + * @ret qn_offset Free queue number offset, or negative error + */ +static int arbel_alloc_qn_offset ( arbel_bitmask_t *q_inuse, + unsigned int max_inuse ) { + unsigned int qn_offset = 0; + arbel_bitmask_t mask = 1; + + while ( qn_offset < max_inuse ) { + if ( ( mask & *q_inuse ) == 0 ) { + *q_inuse |= mask; + return qn_offset; + } + qn_offset++; + mask <<= 1; + if ( ! mask ) { + mask = 1; + q_inuse++; + } + } + return -ENFILE; +} + +/** + * Free queue number + * + * @v q_inuse Queue usage bitmask + * @v qn_offset Queue number offset + */ +static void arbel_free_qn_offset ( arbel_bitmask_t *q_inuse, int qn_offset ) { + arbel_bitmask_t mask; + + mask = ( 1 << ( qn_offset % ( 8 * sizeof ( mask ) ) ) ); + q_inuse += ( qn_offset / ( 8 * sizeof ( mask ) ) ); + *q_inuse &= ~mask; +} + /*************************************************************************** * * HCA commands @@ -412,22 +457,78 @@ arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, * @v ibdev Infiniband device * @v */ -static int arbel_create_cq ( struct ib_device *ibdev, +static int arbel_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, struct ib_completion_queue **new_cq ) { struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq; struct arbelprm_completion_queue_context cqctx; - struct ib_completion_queue *cq; + int cqn_offset; + unsigned int cqn; + size_t cqe_size; + unsigned int i; + int rc; - cq = zalloc ( sizeof ( *cq ) ); - if ( ! cq ) - return -ENOMEM; + /* Find a free completion queue number */ + cqn_offset = arbel_alloc_qn_offset ( arbel->cq_inuse, ARBEL_MAX_CQS ); + if ( cqn_offset < 0 ) { + rc = cqn_offset; + goto err_cqn_offset; + } + cqn = ( arbel->limits.reserved_cqs + cqn_offset ); - + /* Allocate control structures */ + arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); + if ( ! arbel_cq ) { + rc = -ENOMEM; + goto err_arbel_cq; + } + arbel_cq->cq.cqn = cqn; + arbel_cq->cq.num_cqes = num_cqes; + INIT_LIST_HEAD ( &arbel_cq->cq.work_queues ); + arbel_cq->doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + /* Allocate completion queue itself */ + cqe_size = ( num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); + if ( ! arbel_cq->cqe ) { + rc = -ENOMEM; + goto err_cqe; + } + memset ( arbel_cq->cqe, 0, cqe_size ); + for ( i = 0 ; i < num_cqes ; i++ ) { + MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); + } + barrier(); + + /* Initialise doorbell records */ + // ... + + /* Hand queue over to hardware */ memset ( &cqctx, 0, sizeof ( cqctx ) ); - + MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); + MLX_FILL_1 ( &cqctx, 2, start_address_l, + virt_to_bus ( arbel_cq->cqe ) ); + /// .... - return arbel_cmd_sw2hw_cq ( arbel, 0, &cqctx ); + if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cqn, &cqctx ) ) != 0 ) { + // ... + } + + + // completion queue number + // doorbell index + + *new_cq = &arbel_cq->cq; + + + return 0; + + err_cqe: + free ( arbel_cq ); + err_arbel_cq: + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + err_cqn_offset: + return rc; } @@ -764,6 +865,8 @@ static void arbel_remove ( struct pci_device *pci ) { static int arbel_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct net_device *netdev; + struct arbelprm_query_dev_lim dev_lim; + struct arbel *arbel = &static_arbel; struct mlx_nic *mlx; struct ib_mac *mac; udqp_t qph; @@ -815,12 +918,14 @@ static int arbel_probe ( struct pci_device *pci, list_add ( &static_ipoib_qp.qp.recv.list, &static_ipoib_recv_cq.cq.work_queues ); - struct arbelprm_query_dev_lim dev_lim; - memset ( &dev_lim, 0xaa, sizeof ( dev_lim ) ); - if ( ( rc = arbel_cmd_query_dev_lim ( &static_arbel, - &dev_lim ) ) != 0 ) { - DBG ( "QUERY_DEV_LIM failed: %s\n", strerror ( rc ) ); + /* Get device limits */ + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + goto err_query_dev_lim; } + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); DBG ( "Device limits:\n "); DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); @@ -830,6 +935,7 @@ static int arbel_probe ( struct pci_device *pci, return 0; + err_query_dev_lim: err_register_netdev: err_ipoib_init: ib_driver_close ( 0 ); diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 590d72f6..85c60a0e 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -146,10 +146,10 @@ /* uar context indexes */ enum { - MADS_RCV_CQ_ARM_DB_IDX, MADS_SND_CQ_ARM_DB_IDX, - IPOIB_RCV_CQ_ARM_DB_IDX, + MADS_RCV_CQ_ARM_DB_IDX, IPOIB_SND_CQ_ARM_DB_IDX, + IPOIB_RCV_CQ_ARM_DB_IDX, MADS_SND_QP_DB_IDX, IPOIB_SND_QP_DB_IDX, GROUP_SEP_IDX, @@ -158,12 +158,12 @@ enum { unmapped doorbell records -------------------------- */ END_UNMAPPED_DB_IDX = 505, - MADS_RCV_QP_DB_IDX = 506, - IPOIB_RCV_QP_DB_IDX = 507, - MADS_RCV_CQ_CI_DB_IDX = 508, - MADS_SND_CQ_CI_DB_IDX = 509, - IPOIB_RCV_CQ_CI_DB_IDX = 510, - IPOIB_SND_CQ_CI_DB_IDX = 511 + IPOIB_RCV_QP_DB_IDX = 506, + MADS_RCV_QP_DB_IDX = 507, + IPOIB_RCV_CQ_CI_DB_IDX = 508, + IPOIB_SND_CQ_CI_DB_IDX = 509, + MADS_RCV_CQ_CI_DB_IDX = 510, + MADS_SND_CQ_CI_DB_IDX = 511, }; /* uar resources types */ From 725a5740424f774e14d5cf82ecefe92e2bdafd31 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 13:10:15 +0100 Subject: [PATCH 37/84] Match doorbell layout to expected usage. --- src/drivers/net/mlx_ipoib/ib_driver.h | 4 ++-- src/drivers/net/mlx_ipoib/mt25218.h | 30 +++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 305bb5d4..57c02820 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -51,7 +51,7 @@ enum { MADS_QPN_SN, IPOIB_QPN_SN, - MAX_APP_QPS + MAX_APP_QPS = 8 }; enum { @@ -59,7 +59,7 @@ enum { MADS_RCV_CQN_SN, IPOIB_SND_CQN_SN, IPOIB_RCV_CQN_SN, - MAX_APP_CQS + MAX_APP_CQS = 8 }; enum { diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 85c60a0e..702b5caf 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -146,24 +146,24 @@ /* uar context indexes */ enum { - MADS_SND_CQ_ARM_DB_IDX, - MADS_RCV_CQ_ARM_DB_IDX, - IPOIB_SND_CQ_ARM_DB_IDX, - IPOIB_RCV_CQ_ARM_DB_IDX, - MADS_SND_QP_DB_IDX, - IPOIB_SND_QP_DB_IDX, - GROUP_SEP_IDX, - START_UNMAPPED_DB_IDX, + MADS_SND_CQ_ARM_DB_IDX = MADS_SND_CQN_SN, + MADS_RCV_CQ_ARM_DB_IDX = MADS_RCV_CQN_SN, + IPOIB_SND_CQ_ARM_DB_IDX = IPOIB_SND_CQN_SN, + IPOIB_RCV_CQ_ARM_DB_IDX = IPOIB_RCV_CQN_SN, + MADS_SND_QP_DB_IDX = ( MAX_APP_CQS + MADS_QPN_SN ), + IPOIB_SND_QP_DB_IDX = ( MAX_APP_CQS + IPOIB_QPN_SN ), + GROUP_SEP_IDX = ( MAX_APP_CQS + MAX_APP_QPS ), + // START_UNMAPPED_DB_IDX, /* -------------------------- unmapped doorbell records -------------------------- */ - END_UNMAPPED_DB_IDX = 505, - IPOIB_RCV_QP_DB_IDX = 506, - MADS_RCV_QP_DB_IDX = 507, - IPOIB_RCV_CQ_CI_DB_IDX = 508, - IPOIB_SND_CQ_CI_DB_IDX = 509, - MADS_RCV_CQ_CI_DB_IDX = 510, - MADS_SND_CQ_CI_DB_IDX = 511, + // END_UNMAPPED_DB_IDX, + IPOIB_RCV_QP_DB_IDX = ( 512 - MAX_APP_CQS - IPOIB_QPN_SN - 1 ), + MADS_RCV_QP_DB_IDX = ( 512 - MAX_APP_CQS - MADS_QPN_SN - 1 ), + IPOIB_RCV_CQ_CI_DB_IDX = ( 512 - IPOIB_RCV_CQN_SN - 1 ), + IPOIB_SND_CQ_CI_DB_IDX = ( 512 - IPOIB_SND_CQN_SN - 1 ), + MADS_RCV_CQ_CI_DB_IDX = ( 512 - MADS_RCV_CQN_SN - 1 ), + MADS_SND_CQ_CI_DB_IDX = ( 512 - MADS_SND_CQN_SN - 1 ), }; /* uar resources types */ From 18edcf66341051ce284e2bd1ee62542c8991b057 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 14:18:17 +0100 Subject: [PATCH 38/84] create_cq() implemented (but not tested). --- src/drivers/net/mlx_ipoib/arbel.h | 27 ++++++++-- src/drivers/net/mlx_ipoib/mt25218.c | 81 ++++++++++++++++++++--------- 2 files changed, 78 insertions(+), 30 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 68c6282c..37fe3693 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -12,10 +12,23 @@ * */ +/* UAR context table (UCE) resource types */ +#define ARBEL_UAR_RES_NONE 0x00 +#define ARBEL_UAR_RES_CQ_CI 0x01 +#define ARBEL_UAR_RES_CQ_ARM 0x02 +#define ARBEL_UAR_RES_SQ 0x03 +#define ARBEL_UAR_RES_RQ 0x04 +#define ARBEL_UAR_RES_GROUP_SEP 0x07 + +/* Work queue entry and completion queue entry opcodes */ #define ARBEL_OPCODE_SEND 0x0a #define ARBEL_OPCODE_RECV_ERROR 0xfe #define ARBEL_OPCODE_SEND_ERROR 0xff +/* HCA command register opcodes */ +#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_SW2HW_CQ 0x0016 + /* * Wrapper structures for hardware datatypes * @@ -24,6 +37,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); @@ -71,6 +85,7 @@ union arbelprm_completion_entry { } __attribute__ (( packed )); union arbelprm_doorbell_record { + struct arbelprm_cq_arm_db_record cq_arm; struct arbelprm_cq_ci_db_record cq_ci; struct arbelprm_qp_db_record qp; } __attribute__ (( packed )); @@ -87,6 +102,8 @@ union arbelprm_doorbell_register { /** Arbel device limits */ struct arbel_dev_limits { + /** Number of reserver UARs */ + unsigned long reserved_uars; /** Number of reserved CQs */ unsigned long reserved_cqs; }; @@ -177,6 +194,8 @@ struct arbel { * Used to get unrestricted memory access. */ unsigned long reserved_lkey; + /** Event queue number */ + unsigned long eqn; /** Completion queue in-use bitmask */ arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; @@ -185,14 +204,14 @@ struct arbel { struct arbel_dev_limits limits; }; +/** Global protection domain */ +#define ARBEL_GLOBAL_PD 0x123456 + /* * HCA commands * */ -#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 -#define ARBEL_HCR_SW2HW_CQ 0x0016 - #define ARBEL_HCR_BASE 0x80680 #define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) #define ARBEL_HCR_MAX_WAIT_MS 2000 @@ -251,7 +270,7 @@ struct arbel { * @ret doorbell_idx Doorbell index */ static inline unsigned int -arbel_arm_cq_doorbell_idx ( unsigned int cqn_offset ) { +arbel_cq_arm_doorbell_idx ( unsigned int cqn_offset ) { return cqn_offset; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 925b00f9..dfdba4b5 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -455,26 +455,37 @@ arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, * Create completion queue * * @v ibdev Infiniband device - * @v + * @v log2_num_cqes Log2 of the number of completion queue entries + * @ret new_cq New completion queue + * @ret rc Return status code */ -static int arbel_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, +static int arbel_create_cq ( struct ib_device *ibdev, + unsigned int log2_num_cqes, struct ib_completion_queue **new_cq ) { struct arbel *arbel = ibdev->priv; struct arbel_completion_queue *arbel_cq; struct arbelprm_completion_queue_context cqctx; + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; unsigned int cqn; + unsigned int num_cqes; size_t cqe_size; + unsigned int ci_doorbell_idx; + unsigned int arm_doorbell_idx; unsigned int i; int rc; /* Find a free completion queue number */ cqn_offset = arbel_alloc_qn_offset ( arbel->cq_inuse, ARBEL_MAX_CQS ); if ( cqn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of completion queues\n", arbel ); rc = cqn_offset; goto err_cqn_offset; } cqn = ( arbel->limits.reserved_cqs + cqn_offset ); + ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); /* Allocate control structures */ arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); @@ -485,9 +496,10 @@ static int arbel_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, arbel_cq->cq.cqn = cqn; arbel_cq->cq.num_cqes = num_cqes; INIT_LIST_HEAD ( &arbel_cq->cq.work_queues ); - arbel_cq->doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arbel_cq->doorbell_idx = ci_doorbell_idx; /* Allocate completion queue itself */ + num_cqes = ( 1 << log2_num_cqes ); cqe_size = ( num_cqes * sizeof ( arbel_cq->cqe[0] ) ); arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); if ( ! arbel_cq->cqe ) { @@ -501,28 +513,43 @@ static int arbel_create_cq ( struct ib_device *ibdev, unsigned int num_cqes, barrier(); /* Initialise doorbell records */ - // ... + ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( ci_db_rec, 1, + res, ARBEL_UAR_RES_CQ_CI, + cq_number, cqn ); + arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( arm_db_rec, 1, + res, ARBEL_UAR_RES_CQ_ARM, + cq_number, cqn ); /* Hand queue over to hardware */ memset ( &cqctx, 0, sizeof ( cqctx ) ); MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); MLX_FILL_1 ( &cqctx, 2, start_address_l, virt_to_bus ( arbel_cq->cqe ) ); - /// .... - + MLX_FILL_2 ( &cqctx, 3, + usr_page, arbel->limits.reserved_uars, + log_cq_size, log2_num_cqes ); + MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); + MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &cqctx, 12, cqn, cqn ); + MLX_FILL_1 ( &cqctx, 13, cq_ci_db_record, ci_doorbell_idx ); + MLX_FILL_1 ( &cqctx, 14, cq_state_db_record, arm_doorbell_idx ); if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cqn, &cqctx ) ) != 0 ) { - // ... + DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", + arbel, strerror ( rc ) ); + goto err_sw2hw; } - - // completion queue number - // doorbell index - *new_cq = &arbel_cq->cq; - - return 0; + err_sw2hw: + memset ( ci_db_rec, 0, sizeof ( *ci_db_rec ) ); + memset ( arm_db_rec, 0, sizeof ( *arm_db_rec ) ); err_cqe: free ( arbel_cq ); err_arbel_cq: @@ -580,7 +607,7 @@ static int arbel_post_send ( struct ib_device *ibdev, struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; struct arbelprm_ud_send_wqe *prev_wqe; struct arbelprm_ud_send_wqe *wqe; - union arbelprm_doorbell_record *db_rec; + struct arbelprm_qp_db_record *qp_db_rec; union arbelprm_doorbell_register db_reg; const struct ib_gid *gid; unsigned int wqe_idx_mask; @@ -602,7 +629,7 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); MLX_FILL_2 ( &wqe->ud, 0, - ud_address_vector.pd, GLOBAL_PD, + ud_address_vector.pd, ARBEL_GLOBAL_PD, ud_address_vector.port_number, PXE_IB_PORT ); MLX_FILL_2 ( &wqe->ud, 1, ud_address_vector.rlid, av->dlid, @@ -631,8 +658,8 @@ static int arbel_post_send ( struct ib_device *ibdev, /* Update doorbell record */ barrier(); - db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx]; - MLX_FILL_1 ( &db_rec->qp, 0, + qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; + MLX_FILL_1 ( qp_db_rec, 0, counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); /* Ring doorbell register */ @@ -800,7 +827,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct arbel *arbel = ibdev->priv; struct arbel_completion_queue *arbel_cq = container_of ( cq, struct arbel_completion_queue, cq ); - union arbelprm_doorbell_record *db_rec; + struct arbelprm_cq_ci_db_record *ci_db_rec; union arbelprm_completion_entry *cqe; unsigned int cqe_idx_mask; int rc; @@ -828,8 +855,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /* Update completion queue's index */ cq->next_idx++; /* Update doorbell record */ - db_rec = &arbel->db_rec[arbel_cq->doorbell_idx]; - MLX_FILL_1 ( &db_rec->cq_ci, 0, + ci_db_rec = &arbel->db_rec[arbel_cq->doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, counter, ( cq->next_idx & 0xffffffffUL ) ); } } @@ -897,12 +924,13 @@ static int arbel_probe ( struct pci_device *pci, memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); /* Hack up IB structures */ - static_arbel.config = memfree_pci_dev.cr_space; - static_arbel.mailbox_in = dev_buffers_p->inprm_buf; - static_arbel.mailbox_out = dev_buffers_p->outprm_buf; - static_arbel.uar = memfree_pci_dev.uar; - static_arbel.db_rec = dev_ib_data.uar_context_base; - static_arbel.reserved_lkey = dev_ib_data.mkey; + arbel->config = memfree_pci_dev.cr_space; + arbel->mailbox_in = dev_buffers_p->inprm_buf; + arbel->mailbox_out = dev_buffers_p->outprm_buf; + arbel->uar = memfree_pci_dev.uar; + arbel->db_rec = dev_ib_data.uar_context_base; + arbel->reserved_lkey = dev_ib_data.mkey; + arbel->eqn = dev_ib_data.eq.eqn; static_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; static_ipoib_qp.recv.wqe = @@ -924,6 +952,7 @@ static int arbel_probe ( struct pci_device *pci, arbel, strerror ( rc ) ); goto err_query_dev_lim; } + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); arbel->limits.reserved_cqs = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); DBG ( "Device limits:\n "); From e238bb1e439cb614f2986a70546559efef16239f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 14:39:51 +0100 Subject: [PATCH 39/84] destroy_cq() now implemented (not tested). --- src/drivers/net/mlx_ipoib/arbel.h | 4 ++ src/drivers/net/mlx_ipoib/mt25218.c | 65 ++++++++++++++++++++++++++++- src/include/gpxe/infiniband.h | 19 +++++++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 37fe3693..d3842467 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -28,6 +28,7 @@ /* HCA command register opcodes */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_SW2HW_CQ 0x0016 +#define ARBEL_HCR_HW2SW_CQ 0x0017 /* * Wrapper structures for hardware datatypes @@ -247,6 +248,9 @@ struct arbel { #define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) +#define ARBEL_HCR_VOID_CMD( _opcode ) \ + ARBEL_HCR_CMD ( _opcode, 0, 0, 0, 0 ) + /* * Doorbell record allocation * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index dfdba4b5..64ae992b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -270,6 +270,13 @@ static struct net_device_operations mlx_operations = { +/*************************************************************************** + * + * Queue number allocation + * + *************************************************************************** + */ + /** * Allocate queue number * @@ -444,6 +451,13 @@ arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, 0, cqctx, cqn, NULL ); } +static inline int +arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_CQ ), + 1, NULL, cqn, NULL ); +} + /*************************************************************************** * * Completion queue operations @@ -548,8 +562,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, return 0; err_sw2hw: - memset ( ci_db_rec, 0, sizeof ( *ci_db_rec ) ); - memset ( arm_db_rec, 0, sizeof ( *arm_db_rec ) ); + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_cq->cqe, cqe_size ); err_cqe: free ( arbel_cq ); err_arbel_cq: @@ -558,6 +573,50 @@ static int arbel_create_cq ( struct ib_device *ibdev, return rc; } +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +static void arbel_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->priv; + struct arbel_completion_queue *arbel_cq = + container_of ( cq, struct arbel_completion_queue, cq ); + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + size_t cqe_size; + unsigned int ci_doorbell_idx; + unsigned int arm_doorbell_idx; + int rc; + + assert ( list_empty ( &cq->work_queues ) ); + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed: %s\n", + arbel, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); + ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + free_dma ( arbel_cq->cqe, cqe_size ); + free ( arbel_cq ); + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); +} /*************************************************************************** * @@ -863,6 +922,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { + .create_cq = arbel_create_cq, + .destroy_cq = arbel_destroy_cq, .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index dd8022fb..973c5823 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -168,6 +168,25 @@ struct ib_address_vector { * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { + /** + * Create completion queue + * + * @v ibdev Infiniband device + * @v log2_num_cqes Log2 of the number of completion queue entries + * @ret new_cq New completion queue + * @ret rc Return status code + */ + int ( * create_cq ) ( struct ib_device *ibdev, + unsigned int log2_num_cqes, + struct ib_completion_queue **new_cq ); + /** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ + void ( * destroy_cq ) ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); /** Post send work queue entry * * @v ibdev Infiniband device From b21d4ca21e65025410df73b34d685b6e78c86f0d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 17:25:15 +0100 Subject: [PATCH 40/84] Revert to dev_priv/owner_priv scheme, rather than container_of; it makes it easier to put the generic allocation code into infiniband.c --- src/drivers/net/mlx_ipoib/arbel.h | 23 ++- src/drivers/net/mlx_ipoib/mt25218.c | 234 +++++++++++++++++----------- src/include/gpxe/infiniband.h | 20 ++- src/net/infiniband.c | 51 ++++++ 4 files changed, 221 insertions(+), 107 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index d3842467..a1ca21f9 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -107,6 +107,8 @@ struct arbel_dev_limits { unsigned long reserved_uars; /** Number of reserved CQs */ unsigned long reserved_cqs; + /** Number of reserved QPs */ + unsigned long reserved_qps; }; /** Alignment of Arbel send work queue entries */ @@ -143,6 +145,15 @@ struct arbel_recv_work_queue { union arbel_recv_wqe *wqe; }; +/** Maximum number of allocatable queue pairs + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_QPS 8 + +/** Base queue pair number */ +#define ARBEL_QPN_BASE 0x550000 + /** An Arbel queue pair */ struct arbel_queue_pair { /** Infiniband queue pair */ @@ -161,10 +172,10 @@ struct arbel_queue_pair { /** An Arbel completion queue */ struct arbel_completion_queue { - /** Infiniband completion queue */ - struct ib_completion_queue cq; - /** Doorbell record number */ - unsigned int doorbell_idx; + /** Consumer counter doorbell record number */ + unsigned int ci_doorbell_idx; + /** Arm queue doorbell record number */ + unsigned int arm_doorbell_idx; /** Completion queue entries */ union arbelprm_completion_entry *cqe; }; @@ -200,6 +211,8 @@ struct arbel { /** Completion queue in-use bitmask */ arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; + /** Queue pair in-use bitmask */ + arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ]; /** Device limits */ struct arbel_dev_limits limits; @@ -301,7 +314,7 @@ arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { } /** - * Get commpletion queue consumer counter doorbell index + * Get completion queue consumer counter doorbell index * * @v cqn_offset Completion queue number offset * @ret doorbell_idx Doorbell index diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 64ae992b..c466adb6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -53,28 +53,28 @@ static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; -static struct arbel_completion_queue static_ipoib_send_cq; -static struct arbel_completion_queue static_ipoib_recv_cq; -static struct arbel_queue_pair static_ipoib_qp = { - .qp = { - .send = { - .qp = &static_ipoib_qp.qp, - .is_send = 1, - .cq = &static_ipoib_send_cq.cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.qp.send.list), - }, - .recv = { - .qp = &static_ipoib_qp.qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq.cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.qp.recv.list), - }, - }, +static struct arbel_completion_queue static_arbel_ipoib_send_cq = { + .ci_doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_send_cq = { + .cqn = 1234, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_SND_CQES, + .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), + .dev_priv = &static_arbel_ipoib_send_cq, +}; + +static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { + .ci_doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, +}; +static struct ib_completion_queue static_ipoib_recv_cq = { + .cqn = 2345, /* Only used for debug messages */ + .num_cqes = NUM_IPOIB_RCV_CQES, + .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), + .dev_priv = &static_arbel_ipoib_recv_cq, +}; + +static struct arbel_queue_pair static_arbel_ipoib_qp = { .send = { .doorbell_idx = IPOIB_SND_QP_DB_IDX, }, @@ -82,24 +82,31 @@ static struct arbel_queue_pair static_ipoib_qp = { .doorbell_idx = IPOIB_RCV_QP_DB_IDX, }, }; -static struct arbel_completion_queue static_ipoib_send_cq = { - .cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .work_queues = LIST_HEAD_INIT (static_ipoib_send_cq.cq.work_queues), +static struct ib_queue_pair static_ipoib_qp = { + .send = { + .qp = &static_ipoib_qp, + .is_send = 1, + .cq = &static_ipoib_send_cq, + .num_wqes = NUM_IPOIB_SND_WQES, + .iobufs = static_ipoib_tx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.send.list), + .dev_priv = &static_arbel_ipoib_qp.send, }, - .doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct arbel_completion_queue static_ipoib_recv_cq = { - .cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .work_queues = LIST_HEAD_INIT (static_ipoib_recv_cq.cq.work_queues), + .recv = { + .qp = &static_ipoib_qp, + .is_send = 0, + .cq = &static_ipoib_recv_cq, + .num_wqes = NUM_IPOIB_RCV_WQES, + .iobufs = static_ipoib_rx_ring, + .list = LIST_HEAD_INIT (static_ipoib_qp.recv.list), + .dev_priv = &static_arbel_ipoib_qp.recv, }, - .doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, + .dev_priv = &static_arbel_ipoib_qp, }; + + static struct ib_device static_ibdev = { - .priv = &static_arbel, + .dev_priv = &static_arbel, }; @@ -150,7 +157,7 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp.qp, &av, iobuf ); + rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); return rc; } @@ -164,7 +171,7 @@ static void temp_complete_send ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; + struct net_device *netdev = qp->owner_priv; DBG ( "Wahey! TX completion\n" ); netdev_tx_complete_err ( netdev, iobuf, @@ -175,7 +182,7 @@ static void temp_complete_recv ( struct ib_device *ibdev __unused, struct ib_queue_pair *qp, struct ib_completion *completion, struct io_buffer *iobuf ) { - struct net_device *netdev = qp->priv; + struct net_device *netdev = qp->owner_priv; struct mlx_nic *mlx = netdev->priv; DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); @@ -205,7 +212,7 @@ static void mlx_refill_rx ( struct net_device *netdev ) { break; DBG ( "Posting RX buffer %p:\n", iobuf ); if ( ( rc = arbel_post_recv ( &static_ibdev, - &static_ipoib_qp.qp, + &static_ipoib_qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; @@ -237,11 +244,10 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq.cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, temp_complete_send, temp_complete_recv ); - arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq.cq, + arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, temp_complete_send, temp_complete_recv ); - // mlx_poll_cq ( netdev, mlx->rcv_cqh, mlx_rx_complete ); mlx_refill_rx ( netdev ); } @@ -469,24 +475,18 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { * Create completion queue * * @v ibdev Infiniband device - * @v log2_num_cqes Log2 of the number of completion queue entries - * @ret new_cq New completion queue + * @v cq Completion queue * @ret rc Return status code */ static int arbel_create_cq ( struct ib_device *ibdev, - unsigned int log2_num_cqes, - struct ib_completion_queue **new_cq ) { - struct arbel *arbel = ibdev->priv; + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; struct arbel_completion_queue *arbel_cq; struct arbelprm_completion_queue_context cqctx; struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - unsigned int cqn; - unsigned int num_cqes; size_t cqe_size; - unsigned int ci_doorbell_idx; - unsigned int arm_doorbell_idx; unsigned int i; int rc; @@ -497,9 +497,7 @@ static int arbel_create_cq ( struct ib_device *ibdev, rc = cqn_offset; goto err_cqn_offset; } - cqn = ( arbel->limits.reserved_cqs + cqn_offset ); - ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); - arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + cq->cqn = ( arbel->limits.reserved_cqs + cqn_offset ); /* Allocate control structures */ arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); @@ -507,58 +505,59 @@ static int arbel_create_cq ( struct ib_device *ibdev, rc = -ENOMEM; goto err_arbel_cq; } - arbel_cq->cq.cqn = cqn; - arbel_cq->cq.num_cqes = num_cqes; - INIT_LIST_HEAD ( &arbel_cq->cq.work_queues ); - arbel_cq->doorbell_idx = ci_doorbell_idx; + arbel_cq->ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); /* Allocate completion queue itself */ - num_cqes = ( 1 << log2_num_cqes ); - cqe_size = ( num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); if ( ! arbel_cq->cqe ) { rc = -ENOMEM; goto err_cqe; } memset ( arbel_cq->cqe, 0, cqe_size ); - for ( i = 0 ; i < num_cqes ; i++ ) { + for ( i = 0 ; i < cq->num_cqes ; i++ ) { MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); } barrier(); /* Initialise doorbell records */ - ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); MLX_FILL_2 ( ci_db_rec, 1, res, ARBEL_UAR_RES_CQ_CI, - cq_number, cqn ); - arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + cq_number, cq->cqn ); + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); MLX_FILL_2 ( arm_db_rec, 1, res, ARBEL_UAR_RES_CQ_ARM, - cq_number, cqn ); + cq_number, cq->cqn ); /* Hand queue over to hardware */ memset ( &cqctx, 0, sizeof ( cqctx ) ); MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); MLX_FILL_1 ( &cqctx, 2, start_address_l, virt_to_bus ( arbel_cq->cqe ) ); +#if 0 MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, log_cq_size, log2_num_cqes ); +#endif MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); - MLX_FILL_1 ( &cqctx, 12, cqn, cqn ); - MLX_FILL_1 ( &cqctx, 13, cq_ci_db_record, ci_doorbell_idx ); - MLX_FILL_1 ( &cqctx, 14, cq_state_db_record, arm_doorbell_idx ); - if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cqn, &cqctx ) ) != 0 ) { + MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); + MLX_FILL_1 ( &cqctx, 13, + cq_ci_db_record, arbel_cq->ci_doorbell_idx ); + MLX_FILL_1 ( &cqctx, 14, + cq_state_db_record, arbel_cq->arm_doorbell_idx ); + if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", arbel, strerror ( rc ) ); goto err_sw2hw; } - *new_cq = &arbel_cq->cq; + cq->dev_priv = arbel_cq; return 0; err_sw2hw: @@ -581,9 +580,8 @@ static int arbel_create_cq ( struct ib_device *ibdev, */ static void arbel_destroy_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq = - container_of ( cq, struct arbel_completion_queue, cq ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; @@ -618,6 +616,53 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); } +/*************************************************************************** + * + * Queue pair operations + * + *************************************************************************** + */ + +static int arbel_create_qp ( struct ib_device *ibdev, + unsigned int log2_num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int log2_num_recv_wqes, + struct ib_completion_queue *recv_cq, + struct ib_queue_pair **new_qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + unsigned int qpn; + unsigned int num_send_wqes; + unsigned int num_recv_wqes; + unsigned int send_doorbell_idx; + unsigned int recv_doorbell_idx; + int rc; + + /* Find a free queue pair number */ + qpn_offset = arbel_alloc_qn_offset ( arbel->qp_inuse, ARBEL_MAX_QPS ); + if ( qpn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of queue pairs\n", arbel ); + rc = qpn_offset; + goto err_qpn_offset; + } + qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); + send_doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + recv_doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + + /* Allocate control structures */ + num_send_wqes = ( 1 << log2_num_send_wqes ); + num_recv_wqes = ( 1 << log2_num_recv_wqes ); + arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + + return 0; + + err_qpn_offset: + return rc; +} + /*************************************************************************** * * Work request operations @@ -659,9 +704,8 @@ static int arbel_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->priv; - struct arbel_queue_pair *arbel_qp - = container_of ( qp, struct arbel_queue_pair, qp ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; struct ib_work_queue *wq = &qp->send; struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; struct arbelprm_ud_send_wqe *prev_wqe; @@ -749,9 +793,8 @@ static int arbel_post_send ( struct ib_device *ibdev, static int arbel_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->priv; - struct arbel_queue_pair *arbel_qp - = container_of ( qp, struct arbel_queue_pair, qp ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; struct ib_work_queue *wq = &qp->recv; struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; struct arbelprm_recv_wqe *wqe; @@ -800,7 +843,7 @@ static int arbel_complete ( struct ib_device *ibdev, union arbelprm_completion_entry *cqe, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; + struct arbel *arbel = ibdev->dev_priv; struct ib_completion completion; struct ib_work_queue *wq; struct ib_queue_pair *qp; @@ -842,7 +885,7 @@ static int arbel_complete ( struct ib_device *ibdev, return -EIO; } qp = wq->qp; - arbel_qp = container_of ( qp, struct arbel_queue_pair, qp ); + arbel_qp = qp->dev_priv; /* Identify work queue entry index */ if ( is_send ) { @@ -883,9 +926,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->priv; - struct arbel_completion_queue *arbel_cq - = container_of ( cq, struct arbel_completion_queue, cq ); + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; struct arbelprm_cq_ci_db_record *ci_db_rec; union arbelprm_completion_entry *cqe; unsigned int cqe_idx_mask; @@ -914,7 +956,7 @@ static void arbel_poll_cq ( struct ib_device *ibdev, /* Update completion queue's index */ cq->next_idx++; /* Update doorbell record */ - ci_db_rec = &arbel->db_rec[arbel_cq->doorbell_idx].cq_ci; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; MLX_FILL_1 ( ci_db_rec, 0, counter, ( cq->next_idx & 0xffffffffUL ) ); } @@ -992,20 +1034,20 @@ static int arbel_probe ( struct pci_device *pci, arbel->db_rec = dev_ib_data.uar_context_base; arbel->reserved_lkey = dev_ib_data.mkey; arbel->eqn = dev_ib_data.eq.eqn; - static_ipoib_qp.send.wqe = + static_arbel_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; - static_ipoib_qp.recv.wqe = + static_arbel_ipoib_qp.recv.wqe = ( ( struct udqp_st * ) qph )->rcv_wq; - static_ipoib_send_cq.cqe = + static_arbel_ipoib_send_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_ipoib_recv_cq.cqe = + static_arbel_ipoib_recv_cq.cqe = ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.qp.priv = netdev; - list_add ( &static_ipoib_qp.qp.send.list, - &static_ipoib_send_cq.cq.work_queues ); - list_add ( &static_ipoib_qp.qp.recv.list, - &static_ipoib_recv_cq.cq.work_queues ); + static_ipoib_qp.qpn = ib_get_qpn ( qph ); + static_ipoib_qp.owner_priv = netdev; + list_add ( &static_ipoib_qp.send.list, + &static_ipoib_send_cq.work_queues ); + list_add ( &static_ipoib_qp.recv.list, + &static_ipoib_recv_cq.work_queues ); /* Get device limits */ if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { @@ -1016,6 +1058,8 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); arbel->limits.reserved_cqs = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); DBG ( "Device limits:\n "); DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 973c5823..d7f8b4ab 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -89,6 +89,8 @@ struct ib_work_queue { unsigned long next_idx; /** I/O buffers assigned to work queue */ struct io_buffer **iobufs; + /** Device private data */ + void *dev_priv; }; /** An Infiniband Queue Pair */ @@ -99,8 +101,10 @@ struct ib_queue_pair { struct ib_work_queue send; /** Receive queue */ struct ib_work_queue recv; + /** Device private data */ + void *dev_priv; /** Queue owner private data */ - void *priv; + void *owner_priv; }; /** An Infiniband Completion Queue */ @@ -119,6 +123,8 @@ struct ib_completion_queue { unsigned long next_idx; /** List of work queues completing to this queue */ struct list_head work_queues; + /** Device private data */ + void *dev_priv; }; /** An Infiniband completion */ @@ -172,13 +178,11 @@ struct ib_device_operations { * Create completion queue * * @v ibdev Infiniband device - * @v log2_num_cqes Log2 of the number of completion queue entries - * @ret new_cq New completion queue + * @v cq Completion queue * @ret rc Return status code */ int ( * create_cq ) ( struct ib_device *ibdev, - unsigned int log2_num_cqes, - struct ib_completion_queue **new_cq ); + struct ib_completion_queue *cq ); /** * Destroy completion queue * @@ -237,8 +241,10 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { - /** Driver private data */ - void *priv; + /** Infiniband operations */ + struct ib_device_operations *op; + /** Device private data */ + void *dev_priv; }; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 694c88b1..2a29c5b2 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -17,11 +17,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include @@ -33,6 +35,55 @@ * */ +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v num_cqes Number of completion queue entries + * @ret cq New completion queue + */ +struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ) { + struct ib_completion_queue *cq; + int rc; + + DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev ); + + /* Allocate and initialise data structure */ + cq = zalloc ( sizeof ( *cq ) ); + if ( ! cq ) + return NULL; + cq->num_cqes = num_cqes; + INIT_LIST_HEAD ( &cq->work_queues ); + + /* Perform device-specific initialisation and get CQN */ + if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise CQ: %s\n", + ibdev, strerror ( rc ) ); + free ( cq ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n", + ibdev, cq->cqn ); + return cq; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n", + ibdev, cq->cqn ); + assert ( list_empty ( &cq->work_queues ) ); + ibdev->op->destroy_cq ( ibdev, cq ); + free ( cq ); +} + /** * Find work queue belonging to completion queue * From 6d15a193aa9e3e4129a885e7010d0d480e723bb8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 17:36:40 +0100 Subject: [PATCH 41/84] Add fls() for non-constant values. --- src/core/bitops.c | 10 ++++++++++ src/include/strings.h | 6 +----- 2 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 src/core/bitops.c diff --git a/src/core/bitops.c b/src/core/bitops.c new file mode 100644 index 00000000..75d57bf9 --- /dev/null +++ b/src/core/bitops.c @@ -0,0 +1,10 @@ +#include + +int __flsl ( long x ) { + int r = 0; + + for ( r = 0 ; x ; r++ ) { + x >>= 1; + } + return r; +} diff --git a/src/include/strings.h b/src/include/strings.h index a087b1d5..968a7c11 100644 --- a/src/include/strings.h +++ b/src/include/strings.h @@ -40,17 +40,13 @@ __constant_flsl ( unsigned long x ) { return r; } -#define __constant_fls(x) __constant_flsl(x) - /* We don't actually have these functions yet */ -extern int __fls ( int x ); extern int __flsl ( long x ); #define flsl( x ) \ ( __builtin_constant_p ( x ) ? __constant_flsl ( x ) : __flsl ( x ) ) -#define fls( x ) \ - ( __builtin_constant_p ( x ) ? __constant_fls ( x ) : __fls ( x ) ) +#define fls( x ) flsl ( x ) extern int strcasecmp ( const char *s1, const char *s2 ); From 83a6cc8c9bf8972b937a5e57d514da023936466e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 17:39:29 +0100 Subject: [PATCH 42/84] Don't get stuck in an infinite loop on negative integers! --- src/core/bitops.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core/bitops.c b/src/core/bitops.c index 75d57bf9..53abaaea 100644 --- a/src/core/bitops.c +++ b/src/core/bitops.c @@ -1,10 +1,11 @@ #include int __flsl ( long x ) { - int r = 0; + unsigned long value = x; + int ls = 0; - for ( r = 0 ; x ; r++ ) { - x >>= 1; + for ( ls = 0 ; value ; ls++ ) { + value >>= 1; } - return r; + return ls; } From 251cc84ed6e10c86a0e97831d8817b993dac13db Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 19:03:24 +0100 Subject: [PATCH 43/84] Started implementing create_qp() and destroy_qp(). --- src/drivers/net/mlx_ipoib/arbel.h | 13 +- src/drivers/net/mlx_ipoib/mt25218.c | 233 +++++++++++++++++++++++----- src/include/gpxe/infiniband.h | 32 +++- src/net/infiniband.c | 72 ++++++++- 4 files changed, 302 insertions(+), 48 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index a1ca21f9..2ef446fa 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -29,6 +29,10 @@ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 +#define ARBEL_HCR_RST2INIT_QPEE 0x0019 +#define ARBEL_HCR_INIT2RTR_QPEE 0x001a +#define ARBEL_HCR_RTR2RTS_QPEE 0x001b +#define ARBEL_HCR_2RST_QPEE 0x0021 /* * Wrapper structures for hardware datatypes @@ -43,6 +47,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); +struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); @@ -126,6 +131,8 @@ struct arbel_send_work_queue { unsigned int doorbell_idx; /** Work queue entries */ union arbel_send_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; }; /** Alignment of Arbel receive work queue entries */ @@ -143,6 +150,8 @@ struct arbel_recv_work_queue { unsigned int doorbell_idx; /** Work queue entries */ union arbel_recv_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; }; /** Maximum number of allocatable queue pairs @@ -156,8 +165,6 @@ struct arbel_recv_work_queue { /** An Arbel queue pair */ struct arbel_queue_pair { - /** Infiniband queue pair */ - struct ib_queue_pair qp; /** Send work queue */ struct arbel_send_work_queue send; /** Receive work queue */ @@ -178,6 +185,8 @@ struct arbel_completion_queue { unsigned int arm_doorbell_idx; /** Completion queue entries */ union arbelprm_completion_entry *cqe; + /** Size of completion queue */ + size_t cqe_size; }; /** An Arbel resource bitmask */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index c466adb6..383689d2 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -464,6 +464,40 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { 1, NULL, cqn, NULL ); } +static inline int +arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, + struct arbelprm_queue_pair_ee_context_entry *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_2RST_QPEE ), + 0x03, NULL, qpn, NULL ); +} + /*************************************************************************** * * Completion queue operations @@ -486,7 +520,6 @@ static int arbel_create_cq ( struct ib_device *ibdev, struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - size_t cqe_size; unsigned int i; int rc; @@ -509,13 +542,14 @@ static int arbel_create_cq ( struct ib_device *ibdev, arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); /* Allocate completion queue itself */ - cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); - arbel_cq->cqe = malloc_dma ( cqe_size, sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe = malloc_dma ( arbel_cq->cqe_size, + sizeof ( arbel_cq->cqe[0] ) ); if ( ! arbel_cq->cqe ) { rc = -ENOMEM; goto err_cqe; } - memset ( arbel_cq->cqe, 0, cqe_size ); + memset ( arbel_cq->cqe, 0, arbel_cq->cqe_size ); for ( i = 0 ; i < cq->num_cqes ; i++ ) { MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); } @@ -538,11 +572,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); MLX_FILL_1 ( &cqctx, 2, start_address_l, virt_to_bus ( arbel_cq->cqe ) ); -#if 0 MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, - log_cq_size, log2_num_cqes ); -#endif + log_cq_size, ( fls ( cq->num_cqes ) - 1 ) ); MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); @@ -554,16 +586,16 @@ static int arbel_create_cq ( struct ib_device *ibdev, if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", arbel, strerror ( rc ) ); - goto err_sw2hw; + goto err_sw2hw_cq; } cq->dev_priv = arbel_cq; return 0; - err_sw2hw: + err_sw2hw_cq: MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - free_dma ( arbel_cq->cqe, cqe_size ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); err_cqe: free ( arbel_cq ); err_arbel_cq: @@ -585,35 +617,31 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; - size_t cqe_size; - unsigned int ci_doorbell_idx; - unsigned int arm_doorbell_idx; int rc; - assert ( list_empty ( &cq->work_queues ) ); - /* Take ownership back from hardware */ if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn ) ) != 0 ) { - DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed: %s\n", - arbel, strerror ( rc ) ); + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " + "%s\n", arbel, cq->cqn, strerror ( rc ) ); /* Leak memory and return; at least we avoid corruption */ return; } /* Clear doorbell records */ - cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); - ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); - arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); - ci_db_rec = &arbel->db_rec[ci_doorbell_idx].cq_ci; - arm_db_rec = &arbel->db_rec[arm_doorbell_idx].cq_arm; + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); /* Free memory */ - cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); - free_dma ( arbel_cq->cqe, cqe_size ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); free ( arbel_cq ); + + /* Mark queue number as free */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + + cq->dev_priv = NULL; } /*************************************************************************** @@ -623,22 +651,50 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, *************************************************************************** */ +static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, + unsigned int num_wqes ) { + + arbel_send_wq->wqe_size = ( num_wqes * + sizeof ( arbel_send_wq->wqe[0] ) ); + arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, + sizeof ( arbel_send_wq->wqe[0] ) ); + if ( ! arbel_send_wq->wqe ) + return -ENOMEM; + + // initialise (prelink?) +} + +static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, + unsigned int num_wqes ) { + + arbel_recv_wq->wqe_size = ( num_wqes * + sizeof ( arbel_recv_wq->wqe[0] ) ); + arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, + sizeof ( arbel_recv_wq->wqe[0] ) ); + if ( ! arbel_recv_wq->wqe ) + return -ENOMEM; + + // initialise (prelink?) +} + + + + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ static int arbel_create_qp ( struct ib_device *ibdev, - unsigned int log2_num_send_wqes, - struct ib_completion_queue *send_cq, - unsigned int log2_num_recv_wqes, - struct ib_completion_queue *recv_cq, - struct ib_queue_pair **new_qp ) { + struct ib_queue_pair *qp ) { struct arbel *arbel = ibdev->dev_priv; struct arbel_queue_pair *arbel_qp; + struct arbelprm_queue_pair_ee_context_entry qpctx; struct arbelprm_qp_db_record *send_db_rec; struct arbelprm_qp_db_record *recv_db_rec; int qpn_offset; - unsigned int qpn; - unsigned int num_send_wqes; - unsigned int num_recv_wqes; - unsigned int send_doorbell_idx; - unsigned int recv_doorbell_idx; int rc; /* Find a free queue pair number */ @@ -648,21 +704,117 @@ static int arbel_create_qp ( struct ib_device *ibdev, rc = qpn_offset; goto err_qpn_offset; } - qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); - send_doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); - recv_doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + qp->qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); /* Allocate control structures */ - num_send_wqes = ( 1 << log2_num_send_wqes ); - num_recv_wqes = ( 1 << log2_num_recv_wqes ); arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + if ( ! arbel_qp ) { + rc = -ENOMEM; + goto err_arbel_qp; + } + arbel_qp->send.doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + arbel_qp->recv.doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + /* Create send and receive work queues */ + if ( ( rc = arbel_create_send_wq ( &arbel_qp->send, + qp->send.num_wqes ) ) != 0 ) + goto err_create_send_wq; + if ( ( rc = arbel_create_recv_wq ( &arbel_qp->recv, + qp->recv.num_wqes ) ) != 0 ) + goto err_create_recv_wq; + + /* Initialise doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( send_db_rec, 1, + res, ARBEL_UAR_RES_SQ, + qp_number, qp->qpn ); + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( recv_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( recv_db_rec, 1, + res, ARBEL_UAR_RES_RQ, + qp_number, qp->qpn ); + + /* Hand queue over to hardware */ + memset ( &qpctx, 0, sizeof ( qpctx ) ); + // ... fill in context + if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rst2init_qpee; + } + if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_init2rtr_qpee; + } + if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ + DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rtr2rts_qpee; + } + + qp->dev_priv = arbel_qp; return 0; + err_rtr2rts_qpee: + err_init2rtr_qpee: + arbel_cmd_2rst_qpee ( arbel, qp->qpn ); + err_rst2init_qpee: + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + err_create_recv_wq: + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + err_create_send_wq: + free ( arbel_qp ); + err_arbel_qp: + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); err_qpn_offset: return rc; } +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +static void arbel_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_2rst_qpee ( arbel, qp->qpn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL 2RST_QPEE failed on QPN %#lx: " + "%s\n", arbel, qp->qpn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + free ( arbel_qp ); + + /* Mark queue number as free */ + qpn_offset = ( qp->qpn - ARBEL_QPN_BASE - arbel->limits.reserved_qps ); + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + + qp->dev_priv = NULL; +} + /*************************************************************************** * * Work request operations @@ -966,6 +1118,8 @@ static void arbel_poll_cq ( struct ib_device *ibdev, static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, .destroy_cq = arbel_destroy_cq, + .create_qp = arbel_create_qp, + .destroy_qp = arbel_destroy_qp, .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, @@ -1048,6 +1202,7 @@ static int arbel_probe ( struct pci_device *pci, &static_ipoib_send_cq.work_queues ); list_add ( &static_ipoib_qp.recv.list, &static_ipoib_recv_cq.work_queues ); + static_ibdev.op = &arbel_ib_operations; /* Get device limits */ if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index d7f8b4ab..4868f717 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -174,8 +174,7 @@ struct ib_address_vector { * These represent a subset of the Infiniband Verbs. */ struct ib_device_operations { - /** - * Create completion queue + /** Create completion queue * * @v ibdev Infiniband device * @v cq Completion queue @@ -183,14 +182,28 @@ struct ib_device_operations { */ int ( * create_cq ) ( struct ib_device *ibdev, struct ib_completion_queue *cq ); - /** - * Destroy completion queue + /** Destroy completion queue * * @v ibdev Infiniband device * @v cq Completion queue */ void ( * destroy_cq ) ( struct ib_device *ibdev, struct ib_completion_queue *cq ); + /** Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ + int ( * create_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); + /** Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ + void ( * destroy_qp ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); /** Post send work queue entry * * @v ibdev Infiniband device @@ -247,7 +260,16 @@ struct ib_device { void *dev_priv; }; - +extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, + unsigned int num_cqes ); +extern void ib_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ); +extern struct ib_queue_pair * +ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq ); +extern void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 2a29c5b2..9a0692ee 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -58,8 +58,8 @@ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, /* Perform device-specific initialisation and get CQN */ if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) { - DBGC ( ibdev, "IBDEV %p could not initialise CQ: %s\n", - ibdev, strerror ( rc ) ); + DBGC ( ibdev, "IBDEV %p could not initialise completion " + "queue: %s\n", ibdev, strerror ( rc ) ); free ( cq ); return NULL; } @@ -84,6 +84,74 @@ void ib_destroy_cq ( struct ib_device *ibdev, free ( cq ); } +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v num_send_wqes Number of send work queue entries + * @v send_cq Send completion queue + * @v num_recv_wqes Number of receive work queue entries + * @v recv_cq Receive completion queue + * @ret qp Queue pair + */ +struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, + unsigned int num_send_wqes, + struct ib_completion_queue *send_cq, + unsigned int num_recv_wqes, + struct ib_completion_queue *recv_cq ) { + struct ib_queue_pair *qp; + int rc; + + DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); + + /* Allocate and initialise data structure */ + qp = zalloc ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + if ( ! qp ) + return NULL; + qp->send.qp = qp; + qp->send.is_send = 1; + qp->send.cq = send_cq; + list_add ( &qp->send.list, &send_cq->work_queues ); + qp->send.num_wqes = num_send_wqes; + qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) ); + qp->recv.qp = qp; + qp->recv.cq = recv_cq; + list_add ( &qp->recv.list, &recv_cq->work_queues ); + qp->recv.num_wqes = num_recv_wqes; + qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) )); + + /* Perform device-specific initialisation and get QPN */ + if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) { + DBGC ( ibdev, "IBDEV %p could not initialise queue pair: " + "%s\n", ibdev, strerror ( rc ) ); + free ( qp ); + return NULL; + } + + DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n", + ibdev, qp->qpn ); + return qp; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +void ib_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n", + ibdev, qp->qpn ); + ibdev->op->destroy_qp ( ibdev, qp ); + free ( qp ); +} + + + /** * Find work queue belonging to completion queue * From 7e85f0d296f1ef908a6eb521f630b396108ffef9 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 20:54:21 +0100 Subject: [PATCH 44/84] create_qp() and destroy_qp() now written (but not tested). --- src/drivers/net/mlx_ipoib/arbel.h | 7 ++ src/drivers/net/mlx_ipoib/bit_ops.h | 23 ++++++ src/drivers/net/mlx_ipoib/ib_driver.h | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 108 ++++++++++++++++++++++---- src/include/gpxe/infiniband.h | 4 +- src/net/infiniband.c | 5 +- 6 files changed, 133 insertions(+), 16 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 2ef446fa..47380ded 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -34,6 +34,12 @@ #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +/* Service types */ +#define ARBEL_ST_UD 0x01 + +/* MTUs */ +#define ARBEL_MTU_2048 0x04 + /* * Wrapper structures for hardware datatypes * @@ -46,6 +52,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 960d0668..8b81bfcc 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -204,6 +204,14 @@ struct addr_64_st { ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) +#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) ) + /* * Build native-endian (positive) dword bitmasks from named fields * @@ -225,6 +233,14 @@ struct addr_64_st { ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) +#define MLX_MASK_5( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_6( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) ) + /* * Populate big-endian dwords from named fields and values * @@ -253,6 +269,13 @@ struct addr_64_st { MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ _index, __VA_ARGS__ ) ) +#define MLX_FILL_5( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_6( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) /* * Modify big-endian dword using named field and value diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 57c02820..5ee46534 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -55,7 +55,7 @@ enum { }; enum { - MADS_SND_CQN_SN, + MADS_SND_CQN_SN = 4, MADS_RCV_CQN_SN, IPOIB_SND_CQN_SN, IPOIB_RCV_CQN_SN, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 383689d2..8d6020db 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -466,7 +466,7 @@ arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { static inline int arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ){ return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, 1, sizeof ( *ctx ) ), @@ -475,7 +475,7 @@ arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, static inline int arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ){ return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, 1, sizeof ( *ctx ) ), @@ -484,7 +484,7 @@ arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, static inline int arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, - struct arbelprm_queue_pair_ee_context_entry *ctx ) { + const struct arbelprm_qp_ee_state_transitions *ctx ) { return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, 1, sizeof ( *ctx ) ), @@ -574,7 +574,7 @@ static int arbel_create_cq ( struct ib_device *ibdev, virt_to_bus ( arbel_cq->cqe ) ); MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, - log_cq_size, ( fls ( cq->num_cqes ) - 1 ) ); + log_cq_size, fls ( cq->num_cqes - 1 ) ); MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); @@ -651,35 +651,76 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, *************************************************************************** */ +/** + * Create send work queue + * + * @v arbel_send_wq Send work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, unsigned int num_wqes ) { + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_ud_send_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + /* Allocate work queue */ arbel_send_wq->wqe_size = ( num_wqes * sizeof ( arbel_send_wq->wqe[0] ) ); arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, sizeof ( arbel_send_wq->wqe[0] ) ); if ( ! arbel_send_wq->wqe ) return -ENOMEM; + memset ( arbel_send_wq->wqe, 0, arbel_send_wq->wqe_size ); - // initialise (prelink?) + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_send_wq->wqe[i].ud; + next_wqe = &arbel_send_wq->wqe[ ( i + 1 ) & wqe_idx_mask ].ud; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; } +/** + * Create receive work queue + * + * @v arbel_recv_wq Receive work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, unsigned int num_wqes ) { + struct arbelprm_recv_wqe *wqe; + struct arbelprm_recv_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + /* Allocate work queue */ arbel_recv_wq->wqe_size = ( num_wqes * sizeof ( arbel_recv_wq->wqe[0] ) ); arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, sizeof ( arbel_recv_wq->wqe[0] ) ); if ( ! arbel_recv_wq->wqe ) return -ENOMEM; + memset ( arbel_recv_wq->wqe, 0, arbel_recv_wq->wqe_size ); - // initialise (prelink?) + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_recv_wq->wqe[i].recv; + next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; } - - - /** * Create queue pair * @@ -691,7 +732,7 @@ static int arbel_create_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) { struct arbel *arbel = ibdev->dev_priv; struct arbel_queue_pair *arbel_qp; - struct arbelprm_queue_pair_ee_context_entry qpctx; + struct arbelprm_qp_ee_state_transitions qpctx; struct arbelprm_qp_db_record *send_db_rec; struct arbelprm_qp_db_record *recv_db_rec; int qpn_offset; @@ -737,17 +778,53 @@ static int arbel_create_qp ( struct ib_device *ibdev, /* Hand queue over to hardware */ memset ( &qpctx, 0, sizeof ( qpctx ) ); - // ... fill in context + MLX_FILL_3 ( &qpctx, 2, + qpc_eec_data.de, 1, + qpc_eec_data.pm_state, 0x03 /* Always 0x03 for UD */, + qpc_eec_data.st, ARBEL_ST_UD ); + MLX_FILL_6 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, + qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), + qpc_eec_data.log_rq_stride, + ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ), + qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), + qpc_eec_data.log_sq_stride, + ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ) ); + MLX_FILL_1 ( &qpctx, 5, + qpc_eec_data.usr_page, arbel->limits.reserved_uars ); + MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, + PXE_IB_PORT ); + MLX_FILL_1 ( &qpctx, 27, qpc_eec_data.pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &qpctx, 29, qpc_eec_data.wqe_lkey, arbel->reserved_lkey ); + MLX_FILL_1 ( &qpctx, 30, qpc_eec_data.ssc, 1 ); + MLX_FILL_1 ( &qpctx, 33, qpc_eec_data.cqn_snd, qp->send.cq->cqn ); + MLX_FILL_1 ( &qpctx, 34, qpc_eec_data.snd_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->send.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 35, qpc_eec_data.snd_db_record_index, + arbel_qp->send.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 38, qpc_eec_data.rsc, 1 ); + MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn ); + MLX_FILL_1 ( &qpctx, 42, qpc_eec_data.rcv_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->recv.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.rcv_db_record_index, + arbel_qp->recv.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey ); if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", arbel, strerror ( rc ) ); goto err_rst2init_qpee; } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_2 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */ ); if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", arbel, strerror ( rc ) ); goto err_init2rtr_qpee; } + memset ( &qpctx, 0, sizeof ( qpctx ) ); if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", arbel, strerror ( rc ) ); @@ -1215,8 +1292,13 @@ static int arbel_probe ( struct pci_device *pci, ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - DBG ( "Device limits:\n "); - DBG_HD ( &dev_lim, sizeof ( dev_lim ) ); + + DBG ( "MADS SND CQN = %#lx\n", dev_ib_data.mads_qp.snd_cq.cqn ); + struct ib_completion_queue *test_cq; + test_cq = ib_create_cq ( &static_ibdev, 32 ); + if ( test_cq ) { + DBG ( "Woot: create_cq() passed!\n" ); + } /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 4868f717..632a214e 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -97,6 +97,8 @@ struct ib_work_queue { struct ib_queue_pair { /** Queue Pair Number */ unsigned long qpn; + /** Queue key */ + unsigned long qkey; /** Send queue */ struct ib_work_queue send; /** Receive queue */ @@ -267,7 +269,7 @@ extern void ib_destroy_cq ( struct ib_device *ibdev, extern struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, - struct ib_completion_queue *recv_cq ); + struct ib_completion_queue *recv_cq, unsigned long qkey ); extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 9a0692ee..a9ca0e31 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -92,13 +92,15 @@ void ib_destroy_cq ( struct ib_device *ibdev, * @v send_cq Send completion queue * @v num_recv_wqes Number of receive work queue entries * @v recv_cq Receive completion queue + * @v qkey Queue key * @ret qp Queue pair */ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, unsigned int num_send_wqes, struct ib_completion_queue *send_cq, unsigned int num_recv_wqes, - struct ib_completion_queue *recv_cq ) { + struct ib_completion_queue *recv_cq, + unsigned long qkey ) { struct ib_queue_pair *qp; int rc; @@ -110,6 +112,7 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); if ( ! qp ) return NULL; + qp->qkey = qkey; qp->send.qp = qp; qp->send.is_send = 1; qp->send.cq = send_cq; From 4ddb6570f838d99ecce3bd051a33a39585d72226 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 22:47:45 +0100 Subject: [PATCH 45/84] Almost working with own-queue allocation. --- src/drivers/net/mlx_ipoib/arbel.h | 4 +- src/drivers/net/mlx_ipoib/cmdif_comm.c | 24 ++++++ src/drivers/net/mlx_ipoib/ib_driver.c | 19 +++++ src/drivers/net/mlx_ipoib/ib_driver.h | 6 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 16 ++++ src/drivers/net/mlx_ipoib/mt25218.c | 108 +++++++++++++++++++++++-- 6 files changed, 166 insertions(+), 11 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 47380ded..cd6a48eb 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -35,11 +35,13 @@ #define ARBEL_HCR_2RST_QPEE 0x0021 /* Service types */ -#define ARBEL_ST_UD 0x01 +#define ARBEL_ST_UD 0x03 /* MTUs */ #define ARBEL_MTU_2048 0x04 +#define ARBEL_INVALID_LKEY 0x00000100UL + /* * Wrapper structures for hardware datatypes * diff --git a/src/drivers/net/mlx_ipoib/cmdif_comm.c b/src/drivers/net/mlx_ipoib/cmdif_comm.c index cf7b9e55..97f288bc 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_comm.c +++ b/src/drivers/net/mlx_ipoib/cmdif_comm.c @@ -112,6 +112,8 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) __u32 hcr[7], data; __u8 status; + DBG ( "Executing command:\n" ); + /* check if go bit is free */ ret = cmdif_is_free(&is_free); if (ret) { @@ -129,6 +131,17 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) edit_hcr(cmd_prms, hcr); __asm__ __volatile__("":::"memory"); + DBG_HD ( &hcr[0], sizeof ( hcr ) ); + if ( cmd_prms->in_trans == TRANS_MAILBOX ) { + size_t size = ( 4 * cmd_prms->in_param_size ); + if ( size > 256 ) + size = 256; +#if ! CREATE_OWN + DBG ( "Input mailbox:\n" ); + DBG_HD ( &cmd_prms->in_param[0], size ); +#endif + } + for (i = 0; i < 7; ++i) { ret = gw_write_cr(HCR_BASE + i * 4, hcr[i]); if (ret) { @@ -168,6 +181,17 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) return -1; } + if ( cmd_prms->out_trans == TRANS_MAILBOX ) { + size_t size = ( 4 * cmd_prms->out_param_size ); + if ( size > 256 ) + size = 256; +#if ! CREATE_OWN + DBG ( "Output mailbox:\n" ); + DBG_HD ( &cmd_prms->out_param[0], size ); +#endif + } + DBG ( "Command executed successfully\n" ); + return 0; } diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index a3015ba2..590fb94d 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -62,6 +62,8 @@ static int wait_logic_link_up(__u8 port) return 0; } +unsigned long ipoib_qkey; + static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) { int rc; @@ -147,6 +149,9 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) qkey, mlid); } + ipoib_qkey = qkey; + +#if 0 rc = create_ipoib_qp(&ib_data.ipoib_qp, &ib_data.ipoib_snd_cq, &ib_data.ipoib_rcv_cq, qkey); @@ -166,6 +171,7 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) } else { tprintf("add_qp_to_mcast_group() success"); } +#endif /* create a broadcast group ud AV */ av = alloc_ud_av(); @@ -178,6 +184,19 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) tprintf("modify_av_params() success"); ib_data.bcast_av = av; +#if ! CREATE_OWN + rc = create_ipoib_qp(&ib_data.ipoib_qp, + &ib_data.ipoib_snd_cq, + &ib_data.ipoib_rcv_cq, qkey); + if (rc) { + eprintf(""); + return rc; + } + + tprintf("create_ipoib_qp() success"); + *ipoib_qph_p = ib_data.ipoib_qp; +#endif + do { rc = poll_eq(&ib_eqe, &num_eqe); if (rc) { diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 5ee46534..6dca8d30 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -49,16 +49,16 @@ #define QPN_BASE 0x550000 enum { - MADS_QPN_SN, IPOIB_QPN_SN, + MADS_QPN_SN = 4, MAX_APP_QPS = 8 }; enum { - MADS_SND_CQN_SN = 4, - MADS_RCV_CQN_SN, IPOIB_SND_CQN_SN, IPOIB_RCV_CQN_SN, + MADS_SND_CQN_SN = 4, + MADS_RCV_CQN_SN, MAX_APP_CQS = 8 }; diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 45d7f46f..ba1108a3 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -336,6 +336,8 @@ static void prep_sw2hw_mpt_buf(void *buf, __u32 mkey) // INS_FLD(virt_to_bus(dev_buffers_p), buf, arbelprm_mpt_st, // start_address_l); // INS_FLD(memreg_size, buf, arbelprm_mpt_st, reg_wnd_len_l); + INS_FLD(0, buf, arbelprm_mpt_st, start_address_l); + INS_FLD(0, buf, arbelprm_mpt_st, start_address_h); INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_l); INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_h); } @@ -1179,6 +1181,8 @@ static int create_mads_qp(void **qp_pp, void **snd_cq_pp, void **rcv_cq_pp) __u8 nds; void *ptr; + DBG ( "*** Creating MADS queue pair ***\n" ); + qp = &dev_ib_data.mads_qp; /* set the pointer to the receive WQEs buffer */ @@ -1289,6 +1293,8 @@ static int create_mads_qp(void **qp_pp, void **snd_cq_pp, void **rcv_cq_pp) *rcv_cq_pp = &qp->rcv_cq; } + DBG ( "*** Created MADS queue pair ***\n" ); + return rc; } @@ -1302,6 +1308,8 @@ static int create_ipoib_qp(void **qp_pp, __u8 nds; void *ptr; + DBG ( "*** Creating IPoIB queue pair ***\n" ); + qp = &dev_ib_data.ipoib_qp; /* set the pointer to the receive WQEs buffer */ @@ -1407,6 +1415,8 @@ static int create_ipoib_qp(void **qp_pp, *rcv_cq_pp = &qp->rcv_cq; } + DBG ( "*** Created IPoIB queue pair ***\n" ); + return rc; } @@ -1427,6 +1437,8 @@ static int create_udqp(struct udqp_st *qp) qp->snd_cq.ci_db_ctx_pointer = dev_ib_data.uar_context_base + 8 * qp->snd_cq.ci_db_ctx_idx; + DBG ( "* Creating send CQ *\n" ); + /* create send CQ */ init_cq_buf(qp->snd_cq.cq_buf, qp->snd_cq.num_cqes); qp->snd_cq.cons_counter = 0; @@ -1443,6 +1455,8 @@ static int create_udqp(struct udqp_st *qp) goto exit; } + DBG ( "* Creating receive CQ *\n" ); + /* create receive CQ */ init_cq_buf(qp->rcv_cq.cq_buf, qp->rcv_cq.num_cqes); qp->rcv_cq.cons_counter = 0; @@ -1460,6 +1474,8 @@ static int create_udqp(struct udqp_st *qp) goto undo_snd_cq; } + DBG ( "* Creating QP *\n" ); + prep_rst2init_qpee_buf(inprm, qp->snd_cq.cqn, qp->rcv_cq.cqn, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 8d6020db..32cb0f7b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -22,6 +22,8 @@ Skeleton NIC driver for Etherboot /* to get the interface to the body of the program */ #include "nic.h" +#define CREATE_OWN 1 + #include "mt25218_imp.c" #include "arbel.h" @@ -35,17 +37,25 @@ static const struct ib_gid arbel_no_gid = { #define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { +#if ! CREATE_OWN /** Queue pair handle */ udqp_t ipoib_qph; - /** Broadcast Address Vector */ - ud_av_t bcast_av; /** Send completion queue */ cq_t snd_cqh; /** Receive completion queue */ cq_t rcv_cqh; +#endif + /** Broadcast Address Vector */ + ud_av_t bcast_av; /** RX fill level */ unsigned int rx_fill; + +#if CREATE_OWN + struct ib_completion_queue *own_send_cq; + struct ib_completion_queue *own_recv_cq; + struct ib_queue_pair *own_qp; +#endif }; @@ -54,6 +64,8 @@ static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; static struct arbel static_arbel; +#if ! CREATE_OWN + static struct arbel_completion_queue static_arbel_ipoib_send_cq = { .ci_doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, }; @@ -104,6 +116,8 @@ static struct ib_queue_pair static_ipoib_qp = { .dev_priv = &static_arbel_ipoib_qp, }; +#endif + static struct ib_device static_ibdev = { .dev_priv = &static_arbel, @@ -157,7 +171,13 @@ static int mlx_transmit_direct ( struct net_device *netdev, }; memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - rc = arbel_post_send ( &static_ibdev, &static_ipoib_qp, &av, iobuf ); + rc = arbel_post_send ( &static_ibdev, +#if CREATE_OWN + mlx->own_qp, +#else + &static_ipoib_qp, +#endif + &av, iobuf ); return rc; } @@ -212,7 +232,11 @@ static void mlx_refill_rx ( struct net_device *netdev ) { break; DBG ( "Posting RX buffer %p:\n", iobuf ); if ( ( rc = arbel_post_recv ( &static_ibdev, +#if CREATE_OWN + mlx->own_qp, +#else &static_ipoib_qp, +#endif iobuf ) ) != 0 ) { free_iob ( iobuf ); break; @@ -244,9 +268,19 @@ static void mlx_poll ( struct net_device *netdev ) { } /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, &static_ipoib_send_cq, + arbel_poll_cq ( &static_ibdev, +#if CREATE_OWN + mlx->own_send_cq, +#else + &static_ipoib_send_cq, +#endif temp_complete_send, temp_complete_recv ); - arbel_poll_cq ( &static_ibdev, &static_ipoib_recv_cq, + arbel_poll_cq ( &static_ibdev, +#if CREATE_OWN + mlx->own_recv_cq, +#else + &static_ipoib_recv_cq, +#endif temp_complete_send, temp_complete_recv ); mlx_refill_rx ( netdev ); @@ -406,6 +440,15 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, opcode_modifier, op_mod, go, 1 ); + DBG_HD ( &hcr, sizeof ( hcr ) ); + if ( in_len ) { + size_t dump_len = in_len; + if ( dump_len > 256 ) + dump_len = 256; + DBG ( "Input:\n" ); + DBG_HD ( in, dump_len ); + } + /* Issue command */ for ( i = 0 ; i < ( sizeof ( hcr ) / sizeof ( hcr.u.dwords[0] ) ) ; i++ ) { @@ -436,6 +479,14 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); memcpy ( out, out_buffer, out_len ); + if ( out_len ) { + size_t dump_len = out_len; + if ( dump_len > 256 ) + dump_len = 256; + DBG ( "Output:\n" ); + DBG_HD ( out, dump_len ); + } + return 0; } @@ -698,7 +749,9 @@ static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, struct arbelprm_recv_wqe *wqe; struct arbelprm_recv_wqe *next_wqe; unsigned int wqe_idx_mask; + size_t nds; unsigned int i; + unsigned int j; /* Allocate work queue */ arbel_recv_wq->wqe_size = ( num_wqes * @@ -711,11 +764,19 @@ static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, /* Link work queue entries */ wqe_idx_mask = ( num_wqes - 1 ); + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); for ( i = 0 ; i < num_wqes ; i++ ) { wqe = &arbel_recv_wq->wqe[i].recv; next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; MLX_FILL_1 ( &wqe->next, 0, nda_31_6, ( virt_to_bus ( next_wqe ) >> 6 ) ); + MLX_FILL_1 ( &wqe->next, 1, nds, ( sizeof ( *wqe ) / 16 ) ); + for ( j = 0 ; ( ( ( void * ) &wqe->data[j] ) < + ( ( void * ) ( wqe + 1 ) ) ) ; j++ ) { + MLX_FILL_1 ( &wqe->data[j], 1, + l_key, ARBEL_INVALID_LKEY ); + } } return 0; @@ -787,10 +848,10 @@ static int arbel_create_qp ( struct ib_device *ibdev, qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), qpc_eec_data.log_rq_stride, - ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ), + ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ), qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), qpc_eec_data.log_sq_stride, - ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ) ); + ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ) ); MLX_FILL_1 ( &qpctx, 5, qpc_eec_data.usr_page, arbel->limits.reserved_uars ); MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, @@ -976,6 +1037,7 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); MLX_FILL_1 ( &wqe->data[0], 3, local_address_l, virt_to_bus ( iobuf->data ) ); @@ -1249,6 +1311,7 @@ static int arbel_probe ( struct pci_device *pci, /* Initialise hardware */ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ipoib_init; +#if ! CREATE_OWN mlx->ipoib_qph = qph; mlx->bcast_av = ib_data.bcast_av; mlx->snd_cqh = ib_data.ipoib_snd_cq; @@ -1256,6 +1319,7 @@ static int arbel_probe ( struct pci_device *pci, mac = ( ( struct ib_mac * ) netdev->ll_addr ); mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); +#endif /* Hack up IB structures */ arbel->config = memfree_pci_dev.cr_space; @@ -1265,6 +1329,7 @@ static int arbel_probe ( struct pci_device *pci, arbel->db_rec = dev_ib_data.uar_context_base; arbel->reserved_lkey = dev_ib_data.mkey; arbel->eqn = dev_ib_data.eq.eqn; +#if ! CREATE_OWN static_arbel_ipoib_qp.send.wqe = ( ( struct udqp_st * ) qph )->snd_wq; static_arbel_ipoib_qp.recv.wqe = @@ -1279,6 +1344,7 @@ static int arbel_probe ( struct pci_device *pci, &static_ipoib_send_cq.work_queues ); list_add ( &static_ipoib_qp.recv.list, &static_ipoib_recv_cq.work_queues ); +#endif static_ibdev.op = &arbel_ib_operations; /* Get device limits */ @@ -1293,12 +1359,40 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); +#if CREATE_OWN + struct ib_device *ibdev = &static_ibdev; + mlx->own_send_cq = ib_create_cq ( ibdev, 32 ); + if ( ! mlx->own_send_cq ) { + DBG ( "Could not create send CQ\n" ); + return -EIO; + } + mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); + if ( ! mlx->own_recv_cq ) { + DBG ( "Could not create send CQ\n" ); + return -EIO; + } + mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, + mlx->own_send_cq, NUM_IPOIB_RCV_WQES, + mlx->own_recv_cq, ipoib_qkey ); + if ( ! mlx->own_qp ) { + DBG ( "Could not create QP\n" ); + return -EIO; + } + mlx->own_qp->owner_priv = netdev; + + mac = ( ( struct ib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( mlx->own_qp->qpn ); + memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); +#endif + +#if 0 DBG ( "MADS SND CQN = %#lx\n", dev_ib_data.mads_qp.snd_cq.cqn ); struct ib_completion_queue *test_cq; test_cq = ib_create_cq ( &static_ibdev, 32 ); if ( test_cq ) { DBG ( "Woot: create_cq() passed!\n" ); } +#endif /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) From 96d0c75c0067fbb55cf1055dfb342d9851d118ff Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Sun, 16 Sep 2007 22:53:57 +0100 Subject: [PATCH 46/84] Now transmits packets on our own allocated IPoIB queue pair. :) --- src/drivers/net/mlx_ipoib/mt25218.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 32cb0f7b..6cef5927 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1311,9 +1311,9 @@ static int arbel_probe ( struct pci_device *pci, /* Initialise hardware */ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ipoib_init; + mlx->bcast_av = ib_data.bcast_av; #if ! CREATE_OWN mlx->ipoib_qph = qph; - mlx->bcast_av = ib_data.bcast_av; mlx->snd_cqh = ib_data.ipoib_snd_cq; mlx->rcv_cqh = ib_data.ipoib_rcv_cq; mac = ( ( struct ib_mac * ) netdev->ll_addr ); From 3c6a6bdc5d78ff8e1ee2ff190183bbea33f0579f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 00:24:44 +0100 Subject: [PATCH 47/84] Multicast join now works. --- src/drivers/net/mlx_ipoib/arbel.h | 25 ++++- src/drivers/net/mlx_ipoib/mt25218.c | 136 ++++++++++++++++++++++++++++ src/include/gpxe/infiniband.h | 47 ++++++++++ 3 files changed, 204 insertions(+), 4 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index cd6a48eb..c4b536a5 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -33,6 +33,9 @@ #define ARBEL_HCR_INIT2RTR_QPEE 0x001a #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_READ_MGM 0x0025 +#define ARBEL_HCR_WRITE_MGM 0x0026 +#define ARBEL_HCR_MGID_HASH 0x0027 /* Service types */ #define ARBEL_ST_UD 0x03 @@ -42,6 +45,17 @@ #define ARBEL_INVALID_LKEY 0x00000100UL +/* + * Datatypes that seem to be missing from the autogenerated documentation + * + */ +struct arbelprm_mgm_hash_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t hash[0x00010]; + pseudo_bit_t reserved1[0x00010]; +}; + /* * Wrapper structures for hardware datatypes * @@ -53,6 +67,8 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); @@ -266,7 +282,8 @@ struct arbel { #define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) /** Build HCR command from component parts */ -#define ARBEL_HCR_CMD( _opcode, _in_mbox, _in_len, _out_mbox, _out_len ) \ +#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \ + _out_mbox, _out_len ) \ ( (_opcode) | \ ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ ( ( (_in_len) / 4 ) << 14 ) | \ @@ -274,13 +291,13 @@ struct arbel { ( ( (_out_len) / 4 ) << 23 ) ) #define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ - ARBEL_HCR_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) + ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) #define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ - ARBEL_HCR_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) #define ARBEL_HCR_VOID_CMD( _opcode ) \ - ARBEL_HCR_CMD ( _opcode, 0, 0, 0, 0 ) + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 ) /* * Doorbell record allocation diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6cef5927..13b7d78b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -549,6 +549,34 @@ arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { 0x03, NULL, qpn, NULL ); } +static inline int +arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, + struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_READ_MGM, + 1, sizeof ( *mgm ) ), + 0, NULL, index, mgm ); +} + +static inline int +arbel_cmd_write_mgm ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_WRITE_MGM, + 1, sizeof ( *mgm ) ), + 0, mgm, index, NULL ); +} + +static inline int +arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, + struct arbelprm_mgm_hash *hash ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MGID_HASH, + 1, sizeof ( *gid ), + 0, sizeof ( *hash ) ), + 0, gid, 0, hash ); +} + /*************************************************************************** * * Completion queue operations @@ -1253,6 +1281,104 @@ static void arbel_poll_cq ( struct ib_device *ibdev, } } +/*************************************************************************** + * + * Multicast group operations + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static int arbel_mcast_attach ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + index = MLX_GET ( &hash, hash ); + + /* Check for existing hash table entry */ + if ( ( rc = arbel_cmd_read_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not read MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + if ( MLX_GET ( &mgm, mgmqp_0.qi ) != 0 ) { + /* FIXME: this implementation allows only a single QP + * per multicast group, and doesn't handle hash + * collisions. Sufficient for IPoIB but may need to + * be extended in future. + */ + DBGC ( arbel, "Arbel %p MGID index %#x already in use\n", + arbel, index ); + return -EBUSY; + } + + /* Update hash table entry */ + MLX_FILL_2 ( &mgm, 8, + mgmqp_0.qpn_i, qp->qpn, + mgmqp_0.qi, 1 ); + memcpy ( &mgm.u.dwords[4], gid, sizeof ( *gid ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static void arbel_mcast_detach ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return; + } + index = MLX_GET ( &hash, hash ); + + /* Clear hash table entry */ + memset ( &mgm, 0, sizeof ( mgm ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return; + } +} + + + /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, @@ -1262,6 +1388,8 @@ static struct ib_device_operations arbel_ib_operations = { .post_send = arbel_post_send, .post_recv = arbel_post_recv, .poll_cq = arbel_poll_cq, + .mcast_attach = arbel_mcast_attach, + .mcast_detach = arbel_mcast_detach, }; /** @@ -1379,6 +1507,14 @@ static int arbel_probe ( struct pci_device *pci, return -EIO; } mlx->own_qp->owner_priv = netdev; + struct ib_gid *bcast_gid = ( struct ib_gid * ) &ib_data.bcast_gid; + if ( ( rc = ib_mcast_attach ( ibdev, mlx->own_qp, + bcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + mac = ( ( struct ib_mac * ) netdev->ll_addr ); mac->qpn = htonl ( mlx->own_qp->qpn ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 632a214e..6a38a1b8 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -252,6 +252,27 @@ struct ib_device_operations { struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ); + /** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ + int ( * mcast_attach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); + /** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ + void ( * mcast_detach ) ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ); }; /** An Infiniband device */ @@ -275,6 +296,32 @@ extern void ib_destroy_qp ( struct ib_device *ibdev, extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + return ibdev->op->mcast_attach ( ibdev, qp, gid ); +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static inline __attribute__ (( always_inline )) void +ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_gid *gid ) { + ibdev->op->mcast_detach ( ibdev, qp, gid ); +} extern struct ll_protocol infiniband_protocol; From 67836430e6a434cf8e3d6637bcd27b250d87003f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 02:54:15 +0100 Subject: [PATCH 48/84] Read port GID directly using MAD IFC. --- src/drivers/net/mlx_ipoib/arbel.h | 7 ++ src/drivers/net/mlx_ipoib/mt25218.c | 135 ++++++++++++++++++++++++---- src/include/gpxe/infiniband.h | 115 ++++++++++++++++++++++++ 3 files changed, 242 insertions(+), 15 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index c4b536a5..28893f51 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -33,6 +33,7 @@ #define ARBEL_HCR_INIT2RTR_QPEE 0x001a #define ARBEL_HCR_RTR2RTS_QPEE 0x001b #define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_MAD_IFC 0x0024 #define ARBEL_HCR_READ_MGM 0x0025 #define ARBEL_HCR_WRITE_MGM 0x0026 #define ARBEL_HCR_MGID_HASH 0x0027 @@ -67,6 +68,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); @@ -126,6 +128,11 @@ union arbelprm_doorbell_register { uint32_t dword[2]; } __attribute__ (( packed )); +union arbelprm_mad { + struct arbelprm_mad_ifc ifc; + union ib_mad mad; +} __attribute__ (( packed )); + /* * gPXE-specific definitions * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 13b7d78b..aed6d208 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -549,6 +549,15 @@ arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { 0x03, NULL, qpn, NULL ); } +static inline int +arbel_cmd_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MAD_IFC, + 1, sizeof ( *mad ), + 1, sizeof ( *mad ) ), + 0x03, mad, PXE_IB_PORT, mad ); +} + static inline int arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, struct arbelprm_mgm_entry *mgm ) { @@ -1233,6 +1242,15 @@ static int arbel_complete ( struct ib_device *ibdev, return rc; } +/** + * Drain event queue + * + * @v arbel Arbel device + */ +static void arbel_drain_eq ( struct arbel *arbel ) { +#warning "drain the event queue" +} + /** * Poll completion queue * @@ -1252,6 +1270,9 @@ static void arbel_poll_cq ( struct ib_device *ibdev, unsigned int cqe_idx_mask; int rc; + /* Drain the event queue */ + arbel_drain_eq ( arbel ); + while ( 1 ) { /* Look for completion entry */ cqe_idx_mask = ( cq->num_cqes - 1 ); @@ -1377,8 +1398,6 @@ static void arbel_mcast_detach ( struct ib_device *ibdev, } } - - /** Arbel Infiniband operations */ static struct ib_device_operations arbel_ib_operations = { .create_cq = arbel_create_cq, @@ -1392,20 +1411,83 @@ static struct ib_device_operations arbel_ib_operations = { .mcast_detach = arbel_mcast_detach, }; -/** - * Remove PCI device - * - * @v pci PCI device - */ -static void arbel_remove ( struct pci_device *pci ) { - struct net_device *netdev = pci_get_drvdata ( pci ); - unregister_netdev ( netdev ); - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); +static int arbel_mad_ifc ( struct arbel *arbel, + union arbelprm_mad *mad ) { + struct ib_mad_hdr *hdr = &mad->mad.mad_hdr; + int rc; + + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( rc = arbel_cmd_mad_ifc ( arbel, mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not issue MAD IFC: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + if ( hdr->status != 0 ) { + DBGC ( arbel, "Arbel %p MAD IFC status %04x\n", + arbel, ntohs ( hdr->status ) ); + return -EIO; + } + return 0; } +static int arbel_get_port_info ( struct arbel *arbel, + struct ib_mad_port_info *port_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + hdr->attr_mod = htonl ( PXE_IB_PORT ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get port info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( port_info, &mad.mad.port_info, sizeof ( *port_info ) ); + return 0; +} + +static int arbel_get_guid_info ( struct arbel *arbel, + struct ib_mad_guid_info *guid_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get GUID info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( guid_info, &mad.mad.guid_info, sizeof ( *guid_info ) ); + return 0; +} + +static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + if ( ( rc = arbel_get_guid_info ( arbel, &guid_info ) ) != 0 ) + return rc; + memcpy ( &gid->bytes[0], port_info.gid_prefix, 8 ); + memcpy ( &gid->bytes[8], guid_info.gid_local, 8 ); + return 0; +} + + + /** * Probe PCI device * @@ -1514,11 +1596,20 @@ static int arbel_probe ( struct pci_device *pci, strerror ( rc ) ); return rc; } - + + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + DBG ( "Port GID:\n" ); + DBG_HD ( &ibdev->port_gid, sizeof ( ibdev->port_gid ) ); + mac = ( ( struct ib_mac * ) netdev->ll_addr ); mac->qpn = htonl ( mlx->own_qp->qpn ); - memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); #endif #if 0 @@ -1545,6 +1636,20 @@ static int arbel_probe ( struct pci_device *pci, return rc; } +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct net_device *netdev = pci_get_drvdata ( pci ); + + unregister_netdev ( netdev ); + ib_driver_close ( 0 ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} + static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 6a38a1b8..3f09808c 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -277,6 +277,8 @@ struct ib_device_operations { /** An Infiniband device */ struct ib_device { + /** Port GID */ + struct ib_gid port_gid; /** Infiniband operations */ struct ib_device_operations *op; /** Device private data */ @@ -323,6 +325,119 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, ibdev->op->mcast_detach ( ibdev, qp, gid ); } +/***************************************************************************** + * + * Management datagrams + * + * Portions Copyright (c) 2004 Mellanox Technologies Ltd. All rights + * reserved. + * + */ + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 +#define IB_MGMT_METHOD_DELETE 0x15 +#define IB_MGMT_METHOD_RESP 0x80 + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE 0x0002 +#define IB_SMP_ATTR_NODE_DESC 0x0010 +#define IB_SMP_ATTR_NODE_INFO 0x0011 +#define IB_SMP_ATTR_SWITCH_INFO 0x0012 +#define IB_SMP_ATTR_GUID_INFO 0x0014 +#define IB_SMP_ATTR_PORT_INFO 0x0015 +#define IB_SMP_ATTR_PKEY_TABLE 0x0016 +#define IB_SMP_ATTR_SL_TO_VL_TABLE 0x0017 +#define IB_SMP_ATTR_VL_ARB_TABLE 0x0018 +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE 0x0019 +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE 0x001A +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE 0x001B +#define IB_SMP_ATTR_SM_INFO 0x0020 +#define IB_SMP_ATTR_VENDOR_DIAG 0x0030 +#define IB_SMP_ATTR_LED_INFO 0x0031 +#define IB_SMP_ATTR_VENDOR_MASK 0xFF00 + +struct ib_mad_hdr { + uint8_t base_version; + uint8_t mgmt_class; + uint8_t class_version; + uint8_t method; + uint16_t status; + uint16_t class_specific; + uint64_t tid; + uint16_t attr_id; + uint16_t resv; + uint32_t attr_mod; +} __attribute__ (( packed )); + +struct ib_mad_data { + struct ib_mad_hdr mad_hdr; + uint8_t data[232]; +} __attribute__ (( packed )); + +struct ib_mad_guid_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint8_t gid_local[8]; +} __attribute__ (( packed )); + +struct ib_mad_port_info { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint32_t mkey2[2]; + uint8_t gid_prefix[8]; + uint16_t lid; + uint16_t mastersm_lid; + uint32_t cap_mask; + uint16_t diag_code; + uint16_t mkey_lease_period; + uint8_t local_port_num; + uint8_t link_width_enabled; + uint8_t link_width_supported; + uint8_t link_width_active; + uint8_t port_state__link_speed_supported; + uint8_t link_down_def_state__port_phys_state; + uint8_t lmc__r1__mkey_prot_bits; + uint8_t link_speed_enabled__link_speed_active; +} __attribute__ (( packed )); + +union ib_mad { + struct ib_mad_hdr mad_hdr; + struct ib_mad_data data; + struct ib_mad_guid_info guid_info; + struct ib_mad_port_info port_info; +} __attribute__ (( packed )); + + + + + + + extern struct ll_protocol infiniband_protocol; From 4e78a53cf26b85736123eee29d23d637b4a3883f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 05:04:58 +0100 Subject: [PATCH 49/84] IPoIB code separated out to ipoib.c. --- src/drivers/net/ipoib.c | 411 +++++++++++++++++++++++++ src/drivers/net/mlx_ipoib/ib_driver.c | 5 +- src/drivers/net/mlx_ipoib/ib_driver.h | 2 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/ipoib.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 143 ++++++++- src/include/gpxe/errfile.h | 2 + src/include/gpxe/infiniband.h | 155 +++++++--- src/include/gpxe/ipoib.h | 78 +++++ src/net/infiniband.c | 32 +- 10 files changed, 786 insertions(+), 46 deletions(-) create mode 100644 src/drivers/net/ipoib.c create mode 100644 src/include/gpxe/ipoib.h diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c new file mode 100644 index 00000000..9eed6b39 --- /dev/null +++ b/src/drivers/net/ipoib.c @@ -0,0 +1,411 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/** @file + * + * IP over Infiniband + */ + + + + + +extern unsigned long hack_ipoib_qkey; +extern struct ib_address_vector hack_ipoib_bcast_av; + + + +/** IPoIB MTU */ +#define IPOIB_MTU 2048 + +/** Number of IPoIB send work queue entries */ +#define IPOIB_NUM_SEND_WQES 8 + +/** Number of IPoIB receive work queue entries */ +#define IPOIB_NUM_RECV_WQES 8 + +/** Number of IPoIB completion entries */ +#define IPOIB_NUM_CQES 8 + +struct ipoib_device { + struct ib_device *ibdev; + struct ib_completion_queue *cq; + struct ib_queue_pair *qp; + unsigned int rx_fill; +}; + +/**************************************************************************** + * + * IPoIB link layer + * + **************************************************************************** + */ + +/** Broadcast IPoIB address */ +static struct ipoib_mac ipoib_broadcast = { + .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, +}; + +/** + * Transmit IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * @v net_protocol Network-layer protocol + * @v ll_dest Link-layer destination address + * + * Prepends the IPoIB link-layer header and transmits the packet. + */ +static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev, + struct net_protocol *net_protocol, + const void *ll_dest ) { + struct ipoib_hdr *ipoib_hdr = + iob_push ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Build IPoIB header */ + memcpy ( &ipoib_hdr->pseudo.peer, ll_dest, + sizeof ( ipoib_hdr->pseudo.peer ) ); + ipoib_hdr->real.proto = net_protocol->net_proto; + ipoib_hdr->real.reserved = 0; + + /* Hand off to network device */ + return netdev_tx ( netdev, iobuf ); +} + +/** + * Process received IPoIB packet + * + * @v iobuf I/O buffer + * @v netdev Network device + * + * Strips off the IPoIB link-layer header and passes up to the + * network-layer protocol. + */ +static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { + struct ipoib_hdr *ipoib_hdr = iobuf->data; + + /* Sanity check */ + if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { + DBG ( "IPoIB packet too short (%d bytes)\n", + iob_len ( iobuf ) ); + free_iob ( iobuf ); + return -EINVAL; + } + + /* Strip off IPoIB header */ + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); + + /* Hand off to network-layer protocol */ + return net_rx ( iobuf, netdev, ipoib_hdr->real.proto, + &ipoib_hdr->pseudo.peer ); +} + +/** + * Transcribe IPoIB address + * + * @v ll_addr Link-layer address + * @ret string Link-layer address in human-readable format + */ +const char * ipoib_ntoa ( const void *ll_addr ) { + static char buf[61]; + const uint8_t *ipoib_addr = ll_addr; + unsigned int i; + char *p = buf; + + for ( i = 0 ; i < IPOIB_ALEN ; i++ ) { + p += sprintf ( p, ":%02x", ipoib_addr[i] ); + } + return ( buf + 1 ); +} + +/** IPoIB protocol */ +struct ll_protocol ipoib_protocol __ll_protocol = { + .name = "IPoIB", + .ll_proto = htons ( ARPHRD_INFINIBAND ), + .ll_addr_len = IPOIB_ALEN, + .ll_header_len = IPOIB_HLEN, + .ll_broadcast = ( uint8_t * ) &ipoib_broadcast, + .tx = ipoib_tx, + .rx = ipoib_rx, + .ntoa = ipoib_ntoa, +}; + +/**************************************************************************** + * + * IPoIB network device + * + **************************************************************************** + */ + +/** + * Transmit packet via IPoIB network device + * + * @v netdev Network device + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int ipoib_transmit ( struct net_device *netdev, + struct io_buffer *iobuf ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + + if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { + DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); + return -EINVAL; + } + + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); + return ib_post_send ( ibdev, ipoib->qp, + &hack_ipoib_bcast_av, iobuf ); +} + +/** + * Handle IPoIB send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + + netdev_tx_complete_err ( netdev, iobuf, + ( completion->syndrome ? -EIO : 0 ) ); +} + +/** + * Handle IPoIB receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + struct ib_global_route_header *grh = iobuf->data; + struct ipoib_pseudo_hdr *ipoib_pshdr; + + if ( completion->syndrome ) { + netdev_rx_err ( netdev, iobuf, -EIO ); + } else { + iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, ( sizeof ( *grh ) - + sizeof ( *ipoib_pshdr ) ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + netdev_rx ( netdev, iobuf ); + } + + ipoib->rx_fill--; +} + +/** + * Refill IPoIB receive ring + * + * @v ipoib IPoIB device + */ +static void ipoib_refill_recv ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + int rc; + + while ( ipoib->rx_fill < IPOIB_NUM_RECV_WQES ) { + iobuf = alloc_iob ( IPOIB_MTU ); + if ( ! iobuf ) + break; + if ( ( rc = ib_post_recv ( ibdev, ipoib->qp, + iobuf ) ) != 0 ) { + free_iob ( iobuf ); + break; + } + ipoib->rx_fill++; + } +} + +/** + * Poll IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_poll ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + ib_poll_cq ( ibdev, ipoib->cq, ipoib_complete_send, + ipoib_complete_recv ); + ipoib_refill_recv ( ipoib ); +} + +/** + * Enable/disable interrupts on IPoIB network device + * + * @v netdev Network device + * @v enable Interrupts should be enabled + */ +static void ipoib_irq ( struct net_device *netdev __unused, + int enable __unused ) { + /* No implementation */ +} + +/** + * Open IPoIB network device + * + * @v netdev Network device + * @ret rc Return status code + */ +static int ipoib_open ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Attach to broadcast multicast GID */ + if ( ( rc = ib_mcast_attach ( ibdev, ipoib->qp, + &ibdev->broadcast_gid ) ) != 0 ) { + DBG ( "Could not attach to broadcast GID: %s\n", + strerror ( rc ) ); + return rc; + } + + /* Fill receive ring */ + ipoib_refill_recv ( ipoib ); + + return 0; +} + +/** + * Close IPoIB network device + * + * @v netdev Network device + */ +static void ipoib_close ( struct net_device *netdev ) { + struct ipoib_device *ipoib = netdev->priv; + struct ib_device *ibdev = ipoib->ibdev; + + /* Detach from broadcast multicast GID */ + ib_mcast_detach ( ibdev, ipoib->qp, &ipoib_broadcast.gid ); + + /* FIXME: should probably flush the receive ring */ +} + +/** IPoIB network device operations */ +static struct net_device_operations ipoib_operations = { + .open = ipoib_open, + .close = ipoib_close, + .transmit = ipoib_transmit, + .poll = ipoib_poll, + .irq = ipoib_irq, +}; + +/** + * Probe IPoIB device + * + * @v ibdev Infiniband device + * @ret rc Return status code + */ +int ipoib_probe ( struct ib_device *ibdev ) { + struct net_device *netdev; + struct ipoib_device *ipoib; + struct ipoib_mac *mac; + int rc; + + /* Allocate network device */ + netdev = alloc_ipoibdev ( sizeof ( *ipoib ) ); + if ( ! netdev ) + return -ENOMEM; + netdev_init ( netdev, &ipoib_operations ); + ipoib = netdev->priv; + ib_set_ownerdata ( ibdev, netdev ); + netdev->dev = ibdev->dev; + memset ( ipoib, 0, sizeof ( *ipoib ) ); + ipoib->ibdev = ibdev; + + /* Allocate completion queue */ + ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES ); + if ( ! ipoib->cq ) { + DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", + ipoib ); + rc = -ENOMEM; + goto err_create_cq; + } + + /* Allocate queue pair */ + ipoib->qp = ib_create_qp ( ibdev, IPOIB_NUM_SEND_WQES, + ipoib->cq, IPOIB_NUM_RECV_WQES, + ipoib->cq, hack_ipoib_qkey ); + if ( ! ipoib->qp ) { + DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", + ipoib ); + rc = -ENOMEM; + goto err_create_qp; + } + ipoib->qp->owner_priv = netdev; + + /* Construct MAC address */ + mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); + mac->qpn = htonl ( ipoib->qp->qpn ); + memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); + + /* Register network device */ + if ( ( rc = register_netdev ( netdev ) ) != 0 ) + goto err_register_netdev; + + return 0; + + err_register_netdev: + ib_destroy_qp ( ibdev, ipoib->qp ); + err_create_qp: + ib_destroy_cq ( ibdev, ipoib->cq ); + err_create_cq: + netdev_nullify ( netdev ); + netdev_put ( netdev ); + return rc; +} + +/** + * Remove IPoIB device + * + * @v ibdev Infiniband device + */ +void ipoib_remove ( struct ib_device *ibdev ) { + struct net_device *netdev = ib_get_ownerdata ( ibdev ); + + unregister_netdev ( netdev ); + netdev_nullify ( netdev ); + netdev_put ( netdev ); +} diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index 590fb94d..34d4cbaa 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -63,6 +63,7 @@ static int wait_logic_link_up(__u8 port) } unsigned long ipoib_qkey; +unsigned long hack_ipoib_qkey; static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) { @@ -149,7 +150,7 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) qkey, mlid); } - ipoib_qkey = qkey; + hack_ipoib_qkey = ipoib_qkey = qkey; #if 0 rc = create_ipoib_qp(&ib_data.ipoib_qp, @@ -285,7 +286,7 @@ static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p) end = currticks() + tout; do { - rc = ib_poll_cq(cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(cqh, &ib_cqe, &num_cqes); if (rc) return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 6dca8d30..7fc57364 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -153,7 +153,7 @@ static int gw_read_cr(__u32 addr, __u32 * result); static int gw_write_cr(__u32 addr, __u32 data); static ud_av_t alloc_ud_av(void); static void free_ud_av(ud_av_t av); -static int ib_poll_cq(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); +static int ib_poll_cqx(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); static int add_qp_to_mcast_group(union ib_gid_u mcast_gid, __u8 add); static int clear_interrupt(void); static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index ba1108a3..a5d251d4 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -1730,7 +1730,7 @@ static void dev2ib_cqe(struct ib_cqe_st *ib_cqe_p, union cqe_st *cqe_p) byte_cnt); } -static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) +static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) { int rc; union cqe_st cqe; diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index d4124f21..d8dd6bf6 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -879,7 +879,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, void *buf, *out_buf; __u16 prot_type; - rc = ib_poll_cq(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cqx(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); if (rc) { return rc; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index aed6d208..6aa4e7fe 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -16,6 +16,7 @@ Skeleton NIC driver for Etherboot #include #include #include +#include /* to get some global routines like printf */ #include "etherboot.h" @@ -29,11 +30,18 @@ Skeleton NIC driver for Etherboot #include "arbel.h" +struct ib_address_vector hack_ipoib_bcast_av; + + + + static const struct ib_gid arbel_no_gid = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } }; +#if 0 + #define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES struct mlx_nic { @@ -275,6 +283,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_send_cq, #endif temp_complete_send, temp_complete_recv ); +#if 0 arbel_poll_cq ( &static_ibdev, #if CREATE_OWN mlx->own_recv_cq, @@ -282,6 +291,7 @@ static void mlx_poll ( struct net_device *netdev ) { &static_ipoib_recv_cq, #endif temp_complete_send, temp_complete_recv ); +#endif mlx_refill_rx ( netdev ); } @@ -308,6 +318,8 @@ static struct net_device_operations mlx_operations = { }; +#endif /* 0 */ + /*************************************************************************** @@ -1488,6 +1500,8 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { +#if 0 + /** * Probe PCI device * @@ -1576,14 +1590,17 @@ static int arbel_probe ( struct pci_device *pci, DBG ( "Could not create send CQ\n" ); return -EIO; } +#if 0 mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); if ( ! mlx->own_recv_cq ) { DBG ( "Could not create send CQ\n" ); return -EIO; } +#endif mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, mlx->own_send_cq, NUM_IPOIB_RCV_WQES, - mlx->own_recv_cq, ipoib_qkey ); + //mlx->own_recv_cq, ipoib_qkey ); + mlx->own_send_cq, ipoib_qkey ); if ( ! mlx->own_qp ) { DBG ( "Could not create QP\n" ); return -EIO; @@ -1621,6 +1638,22 @@ static int arbel_probe ( struct pci_device *pci, } #endif + ibdev->dev = &pci->dev; + + + struct ud_av_st *bcast_av = mlx->bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + /* Register network device */ if ( ( rc = register_netdev ( netdev ) ) != 0 ) goto err_register_netdev; @@ -1650,6 +1683,114 @@ static void arbel_remove ( struct pci_device *pci ) { netdev_put ( netdev ); } +#endif /* 0 */ + + + +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct ib_device *ibdev; + struct arbelprm_query_dev_lim dev_lim; + struct arbel *arbel; + udqp_t qph; + int rc; + + /* Allocate Infiniband device */ + ibdev = alloc_ibdev ( sizeof ( *arbel ) ); + if ( ! ibdev ) + return -ENOMEM; + ibdev->op = &arbel_ib_operations; + pci_set_drvdata ( pci, ibdev ); + ibdev->dev = &pci->dev; + arbel = ibdev->dev_priv; + memset ( arbel, 0, sizeof ( *arbel ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Initialise hardware */ + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) + goto err_ib_driver_init; + + /* Hack up IB structures */ + arbel->config = memfree_pci_dev.cr_space; + arbel->mailbox_in = dev_buffers_p->inprm_buf; + arbel->mailbox_out = dev_buffers_p->outprm_buf; + arbel->uar = memfree_pci_dev.uar; + arbel->db_rec = dev_ib_data.uar_context_base; + arbel->reserved_lkey = dev_ib_data.mkey; + arbel->eqn = dev_ib_data.eq.eqn; + + /* Get device limits */ + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + goto err_query_dev_lim; + } + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + goto err_get_port_gid; + } + + struct ud_av_st *bcast_av = ib_data.bcast_av; + struct arbelprm_ud_address_vector *bav = + ( struct arbelprm_ud_address_vector * ) &bcast_av->av; + struct ib_address_vector *av = &hack_ipoib_bcast_av; + av->dest_qp = bcast_av->dest_qp; + av->qkey = bcast_av->qkey; + av->dlid = MLX_GET ( bav, rlid ); + av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); + av->sl = MLX_GET ( bav, sl ); + av->gid_present = 1; + memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); + + memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", + arbel, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_port_gid: + err_query_dev_lim: + ib_driver_close ( 0 ); + err_ib_driver_init: + free_ibdev ( ibdev ); + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct ib_device *ibdev = pci_get_drvdata ( pci ); + + ipoib_remove ( ibdev ); + ib_driver_close ( 0 ); +} + static struct pci_device_id arbel_nics[] = { PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 3413f9cf..325d2387 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -101,6 +101,8 @@ #define ERRFILE_via_rhine ( ERRFILE_DRIVER | 0x00440000 ) #define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) #define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) +#define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 ) +#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 3f09808c..e9e0121d 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -8,7 +8,43 @@ */ #include -#include +#include + + + +#if 0 +/** Infiniband MAC address length */ +#define IB_ALEN 20 + +/** An Infiniband MAC address */ +struct ib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** Infiniband link-layer header length */ +#define IB_HLEN 4 + +/** An Infiniband link-layer header */ +struct ibhdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); +#endif + + + + + + + /** An Infiniband Global Identifier */ struct ib_gid { @@ -36,33 +72,6 @@ struct ib_global_route_header { struct ib_gid dgid; } __attribute__ (( packed )); -/** Infiniband MAC address length */ -#define IB_ALEN 20 - -/** An Infiniband MAC address */ -struct ib_mac { - /** Queue pair number - * - * MSB must be zero; QPNs are only 24-bit. - */ - uint32_t qpn; - /** Port GID */ - struct ib_gid gid; -} __attribute__ (( packed )); - -/** Infiniband link-layer header length */ -#define IB_HLEN 4 - -/** An Infiniband link-layer header */ -struct ibhdr { - /** Network-layer protocol */ - uint16_t proto; - /** Reserved, must be zero */ - uint16_t reserved; -} __attribute__ (( packed )); - - - struct ib_device; struct ib_queue_pair; struct ib_completion_queue; @@ -223,8 +232,7 @@ struct ib_device_operations { struct ib_queue_pair *qp, struct ib_address_vector *av, struct io_buffer *iobuf ); - /** - * Post receive work queue entry + /** Post receive work queue entry * * @v ibdev Infiniband device * @v qp Queue pair @@ -252,8 +260,7 @@ struct ib_device_operations { struct ib_completion_queue *cq, ib_completer_t complete_send, ib_completer_t complete_recv ); - /** - * Attach to multicast group + /** Attach to multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -263,8 +270,7 @@ struct ib_device_operations { int ( * mcast_attach ) ( struct ib_device *ibdev, struct ib_queue_pair *qp, struct ib_gid *gid ); - /** - * Detach from multicast group + /** Detach from multicast group * * @v ibdev Infiniband device * @v qp Queue pair @@ -276,13 +282,19 @@ struct ib_device_operations { }; /** An Infiniband device */ -struct ib_device { +struct ib_device { /** Port GID */ struct ib_gid port_gid; + /** Broadcast GID */ + struct ib_gid broadcast_gid; + /** Underlying device */ + struct device *dev; /** Infiniband operations */ struct ib_device_operations *op; /** Device private data */ void *dev_priv; + /** Owner private data */ + void *owner_priv; }; extern struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, @@ -297,6 +309,52 @@ extern void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ); extern struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, unsigned long qpn, int is_send ); +extern struct ib_device * alloc_ibdev ( size_t priv_size ); +extern void free_ibdev ( struct ib_device *ibdev ); + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct ib_address_vector *av, struct io_buffer *iobuf ) { + return ibdev->op->post_send ( ibdev, qp, av, iobuf ); +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static inline __attribute__ (( always_inline )) int +ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + return ibdev->op->post_recv ( ibdev, qp, iobuf ); +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static inline __attribute__ (( always_inline )) void +ib_poll_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq, + ib_completer_t complete_send, ib_completer_t complete_recv ) { + ibdev->op->poll_cq ( ibdev, cq, complete_send, complete_recv ); +} + /** * Attach to multicast group @@ -325,6 +383,27 @@ ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp, ibdev->op->mcast_detach ( ibdev, qp, gid ); } +/** + * Set Infiniband owner-private data + * + * @v pci Infiniband device + * @v priv Private data + */ +static inline void ib_set_ownerdata ( struct ib_device *ibdev, + void *owner_priv ) { + ibdev->owner_priv = owner_priv; +} + +/** + * Get Infiniband owner-private data + * + * @v pci Infiniband device + * @ret priv Private data + */ +static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { + return ibdev->owner_priv; +} + /***************************************************************************** * * Management datagrams @@ -435,9 +514,7 @@ union ib_mad { - - - +#if 0 extern struct ll_protocol infiniband_protocol; @@ -459,4 +536,6 @@ static inline struct net_device * alloc_ibdev ( size_t priv_size ) { return netdev; } +#endif + #endif /* _GPXE_INFINIBAND_H */ diff --git a/src/include/gpxe/ipoib.h b/src/include/gpxe/ipoib.h new file mode 100644 index 00000000..0551687d --- /dev/null +++ b/src/include/gpxe/ipoib.h @@ -0,0 +1,78 @@ +#ifndef _GPXE_IPOIB_H +#define _GPXE_IPOIB_H + +/** @file + * + * IP over Infiniband + */ + +#include + +/** IPoIB MAC address length */ +#define IPOIB_ALEN 20 + +/** An IPoIB MAC address */ +struct ipoib_mac { + /** Queue pair number + * + * MSB must be zero; QPNs are only 24-bit. + */ + uint32_t qpn; + /** Port GID */ + struct ib_gid gid; +} __attribute__ (( packed )); + +/** IPoIB link-layer header length */ +#define IPOIB_HLEN 24 + +/** + * IPoIB link-layer header pseudo portion + * + * This part doesn't actually exist on the wire, but it provides a + * convenient way to fit into the typical network device model. + */ +struct ipoib_pseudo_hdr { + /** Peer address */ + struct ipoib_mac peer; +} __attribute__ (( packed )); + +/** IPoIB link-layer header real portion */ +struct ipoib_real_hdr { + /** Network-layer protocol */ + uint16_t proto; + /** Reserved, must be zero */ + uint16_t reserved; +} __attribute__ (( packed )); + +/** An IPoIB link-layer header */ +struct ipoib_hdr { + /** Pseudo portion */ + struct ipoib_pseudo_hdr pseudo; + /** Real portion */ + struct ipoib_real_hdr real; +} __attribute__ (( packed )); + +extern struct ll_protocol ipoib_protocol; + +extern const char * ipoib_ntoa ( const void *ll_addr ); + +/** + * Allocate IPoIB device + * + * @v priv_size Size of driver private data + * @ret netdev Network device, or NULL + */ +static inline struct net_device * alloc_ipoibdev ( size_t priv_size ) { + struct net_device *netdev; + + netdev = alloc_netdev ( priv_size ); + if ( netdev ) { + netdev->ll_protocol = &ipoib_protocol; + } + return netdev; +} + +extern int ipoib_probe ( struct ib_device *ibdev ); +extern void ipoib_remove ( struct ib_device *ibdev ); + +#endif /* _GPXE_IPOIB_H */ diff --git a/src/net/infiniband.c b/src/net/infiniband.c index a9ca0e31..7a68b7d4 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -153,8 +153,6 @@ void ib_destroy_qp ( struct ib_device *ibdev, free ( qp ); } - - /** * Find work queue belonging to completion queue * @@ -174,7 +172,35 @@ struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq, return NULL; } +/** + * Allocate Infiniband device + * + * @v priv_size Size of private data area + * @ret ibdev Infiniband device, or NULL + */ +struct ib_device * alloc_ibdev ( size_t priv_size ) { + struct ib_device *ibdev; + size_t total_len; + total_len = ( sizeof ( *ibdev ) + priv_size ); + ibdev = zalloc ( total_len ); + if ( ibdev ) { + ibdev->dev_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) ); + } + return ibdev; +} + +/** + * Free Infiniband device + * + * @v ibdev Infiniband device + */ +void free_ibdev ( struct ib_device *ibdev ) { + free ( ibdev ); +} + + +#if 0 /** Infiniband broadcast MAC address */ static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; @@ -259,3 +285,5 @@ struct ll_protocol infiniband_protocol __ll_protocol = { .rx = ib_rx, .ntoa = ib_ntoa, }; + +#endif From 440e7926fbfc419115a85b61d3c740f640b68756 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 05:12:47 +0100 Subject: [PATCH 50/84] Dead code removal --- src/drivers/net/mlx_ipoib/mt25218.c | 475 +--------------------------- src/include/gpxe/infiniband.h | 63 ---- 2 files changed, 5 insertions(+), 533 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6aa4e7fe..da33e97b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -35,291 +35,8 @@ struct ib_address_vector hack_ipoib_bcast_av; -static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } -}; -#if 0 - -#define MLX_RX_MAX_FILL NUM_IPOIB_RCV_WQES - -struct mlx_nic { -#if ! CREATE_OWN - /** Queue pair handle */ - udqp_t ipoib_qph; - /** Send completion queue */ - cq_t snd_cqh; - /** Receive completion queue */ - cq_t rcv_cqh; -#endif - /** Broadcast Address Vector */ - ud_av_t bcast_av; - - /** RX fill level */ - unsigned int rx_fill; - -#if CREATE_OWN - struct ib_completion_queue *own_send_cq; - struct ib_completion_queue *own_recv_cq; - struct ib_queue_pair *own_qp; -#endif -}; - - -static struct io_buffer *static_ipoib_tx_ring[NUM_IPOIB_SND_WQES]; -static struct io_buffer *static_ipoib_rx_ring[NUM_IPOIB_RCV_WQES]; - -static struct arbel static_arbel; - -#if ! CREATE_OWN - -static struct arbel_completion_queue static_arbel_ipoib_send_cq = { - .ci_doorbell_idx = IPOIB_SND_CQ_CI_DB_IDX, -}; -static struct ib_completion_queue static_ipoib_send_cq = { - .cqn = 1234, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_SND_CQES, - .work_queues = LIST_HEAD_INIT ( static_ipoib_send_cq.work_queues ), - .dev_priv = &static_arbel_ipoib_send_cq, -}; - -static struct arbel_completion_queue static_arbel_ipoib_recv_cq = { - .ci_doorbell_idx = IPOIB_RCV_CQ_CI_DB_IDX, -}; -static struct ib_completion_queue static_ipoib_recv_cq = { - .cqn = 2345, /* Only used for debug messages */ - .num_cqes = NUM_IPOIB_RCV_CQES, - .work_queues = LIST_HEAD_INIT ( static_ipoib_recv_cq.work_queues ), - .dev_priv = &static_arbel_ipoib_recv_cq, -}; - -static struct arbel_queue_pair static_arbel_ipoib_qp = { - .send = { - .doorbell_idx = IPOIB_SND_QP_DB_IDX, - }, - .recv = { - .doorbell_idx = IPOIB_RCV_QP_DB_IDX, - }, -}; -static struct ib_queue_pair static_ipoib_qp = { - .send = { - .qp = &static_ipoib_qp, - .is_send = 1, - .cq = &static_ipoib_send_cq, - .num_wqes = NUM_IPOIB_SND_WQES, - .iobufs = static_ipoib_tx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.send.list), - .dev_priv = &static_arbel_ipoib_qp.send, - }, - .recv = { - .qp = &static_ipoib_qp, - .is_send = 0, - .cq = &static_ipoib_recv_cq, - .num_wqes = NUM_IPOIB_RCV_WQES, - .iobufs = static_ipoib_rx_ring, - .list = LIST_HEAD_INIT (static_ipoib_qp.recv.list), - .dev_priv = &static_arbel_ipoib_qp.recv, - }, - .dev_priv = &static_arbel_ipoib_qp, -}; - -#endif - - -static struct ib_device static_ibdev = { - .dev_priv = &static_arbel, -}; - - -/** - * Open network device - * - * @v netdev Network device - * @ret rc Return status code - */ -static int mlx_open ( struct net_device *netdev ) { - - ( void ) netdev; - - return 0; -} - -/** - * Close network device - * - * @v netdev Network device - */ -static void mlx_close ( struct net_device *netdev ) { - - ( void ) netdev; - -} - -static int arbel_post_send ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct ib_address_vector *av, - struct io_buffer *iobuf ); - -static int mlx_transmit_direct ( struct net_device *netdev, - struct io_buffer *iobuf ) { - struct mlx_nic *mlx = netdev->priv; - int rc; - - struct ud_av_st *bcast_av = mlx->bcast_av; - struct arbelprm_ud_address_vector *bav = - ( struct arbelprm_ud_address_vector * ) &bcast_av->av; - struct ib_address_vector av = { - .dest_qp = bcast_av->dest_qp, - .qkey = bcast_av->qkey, - .dlid = MLX_GET ( bav, rlid ), - .rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ), - .sl = MLX_GET ( bav, sl ), - .gid_present = 1, - }; - memcpy ( &av.gid, ( ( void * ) bav ) + 16, 16 ); - - rc = arbel_post_send ( &static_ibdev, -#if CREATE_OWN - mlx->own_qp, -#else - &static_ipoib_qp, -#endif - &av, iobuf ); - - return rc; -} - -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ); - -static void temp_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->owner_priv; - - DBG ( "Wahey! TX completion\n" ); - netdev_tx_complete_err ( netdev, iobuf, - ( completion->syndrome ? -EIO : 0 ) ); -} - -static void temp_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { - struct net_device *netdev = qp->owner_priv; - struct mlx_nic *mlx = netdev->priv; - - DBG ( "Yay! RX completion on %p len %zx:\n", iobuf, completion->len ); - if ( completion->syndrome ) { - netdev_rx_err ( netdev, iobuf, -EIO ); - } else { - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - netdev_rx ( netdev, iobuf ); - } - - mlx->rx_fill--; -} - -static int arbel_post_recv ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct io_buffer *iobuf ); - -static void mlx_refill_rx ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - struct io_buffer *iobuf; - int rc; - - while ( mlx->rx_fill < MLX_RX_MAX_FILL ) { - iobuf = alloc_iob ( 2048 ); - if ( ! iobuf ) - break; - DBG ( "Posting RX buffer %p:\n", iobuf ); - if ( ( rc = arbel_post_recv ( &static_ibdev, -#if CREATE_OWN - mlx->own_qp, -#else - &static_ipoib_qp, -#endif - iobuf ) ) != 0 ) { - free_iob ( iobuf ); - break; - } - mlx->rx_fill++; - } -} - -/** - * Poll for completed and received packets - * - * @v netdev Network device - */ -static void mlx_poll ( struct net_device *netdev ) { - struct mlx_nic *mlx = netdev->priv; - int rc; - - if ( ( rc = poll_error_buf() ) != 0 ) { - DBG ( "poll_error_buf() failed: %s\n", strerror ( rc ) ); - return; - } - - /* Drain event queue. We can ignore events, since we're going - * to just poll all completion queues anyway. - */ - if ( ( rc = drain_eq() ) != 0 ) { - DBG ( "drain_eq() failed: %s\n", strerror ( rc ) ); - return; - } - - /* Poll completion queues */ - arbel_poll_cq ( &static_ibdev, -#if CREATE_OWN - mlx->own_send_cq, -#else - &static_ipoib_send_cq, -#endif - temp_complete_send, temp_complete_recv ); -#if 0 - arbel_poll_cq ( &static_ibdev, -#if CREATE_OWN - mlx->own_recv_cq, -#else - &static_ipoib_recv_cq, -#endif - temp_complete_send, temp_complete_recv ); -#endif - - mlx_refill_rx ( netdev ); -} - -/** - * Enable or disable interrupts - * - * @v netdev Network device - * @v enable Interrupts should be enabled - */ -static void mlx_irq ( struct net_device *netdev, int enable ) { - - ( void ) netdev; - ( void ) enable; - -} - -static struct net_device_operations mlx_operations = { - .open = mlx_open, - .close = mlx_close, - .transmit = mlx_transmit_direct, - .poll = mlx_poll, - .irq = mlx_irq, -}; - - -#endif /* 0 */ - /*************************************************************************** @@ -1030,6 +747,11 @@ static void arbel_ring_doorbell ( struct arbel *arbel, writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); } +/** GID used for GID-less send work queue entries */ +static const struct ib_gid arbel_no_gid = { + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } +}; + /** * Post send work queue entry * @@ -1500,193 +1222,6 @@ static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { -#if 0 - -/** - * Probe PCI device - * - * @v pci PCI device - * @v id PCI ID - * @ret rc Return status code - */ -static int arbel_probe ( struct pci_device *pci, - const struct pci_device_id *id __unused ) { - struct net_device *netdev; - struct arbelprm_query_dev_lim dev_lim; - struct arbel *arbel = &static_arbel; - struct mlx_nic *mlx; - struct ib_mac *mac; - udqp_t qph; - int rc; - - /* Allocate net device */ - netdev = alloc_ibdev ( sizeof ( *mlx ) ); - if ( ! netdev ) - return -ENOMEM; - netdev_init ( netdev, &mlx_operations ); - mlx = netdev->priv; - pci_set_drvdata ( pci, netdev ); - netdev->dev = &pci->dev; - memset ( mlx, 0, sizeof ( *mlx ) ); - - /* Fix up PCI device */ - adjust_pci_device ( pci ); - - /* Initialise hardware */ - if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) - goto err_ipoib_init; - mlx->bcast_av = ib_data.bcast_av; -#if ! CREATE_OWN - mlx->ipoib_qph = qph; - mlx->snd_cqh = ib_data.ipoib_snd_cq; - mlx->rcv_cqh = ib_data.ipoib_rcv_cq; - mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( ib_get_qpn ( mlx->ipoib_qph ) ); - memcpy ( &mac->gid, ib_data.port_gid.raw, sizeof ( mac->gid ) ); -#endif - - /* Hack up IB structures */ - arbel->config = memfree_pci_dev.cr_space; - arbel->mailbox_in = dev_buffers_p->inprm_buf; - arbel->mailbox_out = dev_buffers_p->outprm_buf; - arbel->uar = memfree_pci_dev.uar; - arbel->db_rec = dev_ib_data.uar_context_base; - arbel->reserved_lkey = dev_ib_data.mkey; - arbel->eqn = dev_ib_data.eq.eqn; -#if ! CREATE_OWN - static_arbel_ipoib_qp.send.wqe = - ( ( struct udqp_st * ) qph )->snd_wq; - static_arbel_ipoib_qp.recv.wqe = - ( ( struct udqp_st * ) qph )->rcv_wq; - static_arbel_ipoib_send_cq.cqe = - ( ( struct cq_st * ) ib_data.ipoib_snd_cq )->cq_buf; - static_arbel_ipoib_recv_cq.cqe = - ( ( struct cq_st * ) ib_data.ipoib_rcv_cq )->cq_buf; - static_ipoib_qp.qpn = ib_get_qpn ( qph ); - static_ipoib_qp.owner_priv = netdev; - list_add ( &static_ipoib_qp.send.list, - &static_ipoib_send_cq.work_queues ); - list_add ( &static_ipoib_qp.recv.list, - &static_ipoib_recv_cq.work_queues ); -#endif - static_ibdev.op = &arbel_ib_operations; - - /* Get device limits */ - if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get device limits: %s\n", - arbel, strerror ( rc ) ); - goto err_query_dev_lim; - } - arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); - arbel->limits.reserved_cqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); - arbel->limits.reserved_qps = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - -#if CREATE_OWN - struct ib_device *ibdev = &static_ibdev; - mlx->own_send_cq = ib_create_cq ( ibdev, 32 ); - if ( ! mlx->own_send_cq ) { - DBG ( "Could not create send CQ\n" ); - return -EIO; - } -#if 0 - mlx->own_recv_cq = ib_create_cq ( ibdev, 32 ); - if ( ! mlx->own_recv_cq ) { - DBG ( "Could not create send CQ\n" ); - return -EIO; - } -#endif - mlx->own_qp = ib_create_qp ( ibdev, NUM_IPOIB_SND_WQES, - mlx->own_send_cq, NUM_IPOIB_RCV_WQES, - //mlx->own_recv_cq, ipoib_qkey ); - mlx->own_send_cq, ipoib_qkey ); - if ( ! mlx->own_qp ) { - DBG ( "Could not create QP\n" ); - return -EIO; - } - mlx->own_qp->owner_priv = netdev; - struct ib_gid *bcast_gid = ( struct ib_gid * ) &ib_data.bcast_gid; - if ( ( rc = ib_mcast_attach ( ibdev, mlx->own_qp, - bcast_gid ) ) != 0 ) { - DBG ( "Could not attach to broadcast GID: %s\n", - strerror ( rc ) ); - return rc; - } - - if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - - DBG ( "Port GID:\n" ); - DBG_HD ( &ibdev->port_gid, sizeof ( ibdev->port_gid ) ); - - - mac = ( ( struct ib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( mlx->own_qp->qpn ); - memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); -#endif - -#if 0 - DBG ( "MADS SND CQN = %#lx\n", dev_ib_data.mads_qp.snd_cq.cqn ); - struct ib_completion_queue *test_cq; - test_cq = ib_create_cq ( &static_ibdev, 32 ); - if ( test_cq ) { - DBG ( "Woot: create_cq() passed!\n" ); - } -#endif - - ibdev->dev = &pci->dev; - - - struct ud_av_st *bcast_av = mlx->bcast_av; - struct arbelprm_ud_address_vector *bav = - ( struct arbelprm_ud_address_vector * ) &bcast_av->av; - struct ib_address_vector *av = &hack_ipoib_bcast_av; - av->dest_qp = bcast_av->dest_qp; - av->qkey = bcast_av->qkey; - av->dlid = MLX_GET ( bav, rlid ); - av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); - av->sl = MLX_GET ( bav, sl ); - av->gid_present = 1; - memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); - - - /* Register network device */ - if ( ( rc = register_netdev ( netdev ) ) != 0 ) - goto err_register_netdev; - - return 0; - - err_query_dev_lim: - err_register_netdev: - err_ipoib_init: - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); - return rc; -} - -/** - * Remove PCI device - * - * @v pci PCI device - */ -static void arbel_remove ( struct pci_device *pci ) { - struct net_device *netdev = pci_get_drvdata ( pci ); - - unregister_netdev ( netdev ); - ib_driver_close ( 0 ); - netdev_nullify ( netdev ); - netdev_put ( netdev ); -} - -#endif /* 0 */ - - - /** * Probe PCI device * diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index e9e0121d..236b2727 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,42 +10,6 @@ #include #include - - -#if 0 -/** Infiniband MAC address length */ -#define IB_ALEN 20 - -/** An Infiniband MAC address */ -struct ib_mac { - /** Queue pair number - * - * MSB must be zero; QPNs are only 24-bit. - */ - uint32_t qpn; - /** Port GID */ - struct ib_gid gid; -} __attribute__ (( packed )); - -/** Infiniband link-layer header length */ -#define IB_HLEN 4 - -/** An Infiniband link-layer header */ -struct ibhdr { - /** Network-layer protocol */ - uint16_t proto; - /** Reserved, must be zero */ - uint16_t reserved; -} __attribute__ (( packed )); -#endif - - - - - - - - /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -511,31 +475,4 @@ union ib_mad { struct ib_mad_port_info port_info; } __attribute__ (( packed )); - - - -#if 0 - -extern struct ll_protocol infiniband_protocol; - -extern const char * ib_ntoa ( const void *ll_addr ); - -/** - * Allocate Infiniband device - * - * @v priv_size Size of driver private data - * @ret netdev Network device, or NULL - */ -static inline struct net_device * alloc_ibdev ( size_t priv_size ) { - struct net_device *netdev; - - netdev = alloc_netdev ( priv_size ); - if ( netdev ) { - netdev->ll_protocol = &infiniband_protocol; - } - return netdev; -} - -#endif - #endif /* _GPXE_INFINIBAND_H */ From f6f1f2b7bbb5e126f337c15b2e10ba0aeaf287fe Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 06:12:33 +0100 Subject: [PATCH 51/84] Prepare for adding a metadata queue to IPoIB --- src/drivers/net/ipoib.c | 171 ++++++++++++++++++++++++++++------------ src/net/infiniband.c | 91 +-------------------- 2 files changed, 122 insertions(+), 140 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 9eed6b39..2d351e4b 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -45,19 +45,36 @@ extern struct ib_address_vector hack_ipoib_bcast_av; #define IPOIB_MTU 2048 /** Number of IPoIB send work queue entries */ -#define IPOIB_NUM_SEND_WQES 8 +#define IPOIB_DATA_NUM_SEND_WQES 4 /** Number of IPoIB receive work queue entries */ -#define IPOIB_NUM_RECV_WQES 8 +#define IPOIB_DATA_NUM_RECV_WQES 8 /** Number of IPoIB completion entries */ -#define IPOIB_NUM_CQES 8 +#define IPOIB_DATA_NUM_CQES 8 -struct ipoib_device { - struct ib_device *ibdev; +/** An IPoIB queue set */ +struct ipoib_queue_set { + /** Completion queue */ struct ib_completion_queue *cq; + /** Queue pair */ struct ib_queue_pair *qp; - unsigned int rx_fill; + /** Receive work queue fill level */ + unsigned int recv_fill; + /** Receive work queue maximum fill level */ + unsigned int recv_max_fill; +}; + +/** An IPoIB device */ +struct ipoib_device { + /** Network device */ + struct net_device *netdev; + /** Underlying Infiniband device */ + struct ib_device *ibdev; + /** Data queue set */ + struct ipoib_queue_set data; + /** Data queue set */ + struct ipoib_queue_set meta; }; /**************************************************************************** @@ -164,6 +181,69 @@ struct ll_protocol ipoib_protocol __ll_protocol = { **************************************************************************** */ +/** + * Destroy queue set + * + * @v ipoib IPoIB device + * @v qset Queue set + */ +static void ipoib_destroy_qset ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset ) { + struct ib_device *ibdev = ipoib->ibdev; + + if ( qset->qp ) + ib_destroy_qp ( ibdev, qset->qp ); + if ( qset->cq ) + ib_destroy_cq ( ibdev, qset->cq ); + memset ( qset, 0, sizeof ( *qset ) ); +} + +/** + * Create queue set + * + * @v ipoib IPoIB device + * @v qset Queue set + * @ret rc Return status code + */ +static int ipoib_create_qset ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset, + unsigned int num_cqes, + unsigned int num_send_wqes, + unsigned int num_recv_wqes, + unsigned long qkey ) { + struct ib_device *ibdev = ipoib->ibdev; + int rc; + + /* Store queue parameters */ + qset->recv_max_fill = num_recv_wqes; + + /* Allocate completion queue */ + qset->cq = ib_create_cq ( ibdev, num_cqes ); + if ( ! qset->cq ) { + DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", + ipoib ); + rc = -ENOMEM; + goto err; + } + + /* Allocate queue pair */ + qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq, + num_recv_wqes, qset->cq, qkey ); + if ( ! qset->qp ) { + DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", + ipoib ); + rc = -ENOMEM; + goto err; + } + qset->qp->owner_priv = ipoib->netdev; + + return 0; + + err: + ipoib_destroy_qset ( ipoib, qset ); + return rc; +} + /** * Transmit packet via IPoIB network device * @@ -183,7 +263,7 @@ static int ipoib_transmit ( struct net_device *netdev, } iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); - return ib_post_send ( ibdev, ipoib->qp, + return ib_post_send ( ibdev, ipoib->data.qp, &hack_ipoib_bcast_av, iobuf ); } @@ -195,10 +275,10 @@ static int ipoib_transmit ( struct net_device *netdev, * @v completion Completion * @v iobuf I/O buffer */ -static void ipoib_complete_send ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { +static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; netdev_tx_complete_err ( netdev, iobuf, @@ -213,10 +293,10 @@ static void ipoib_complete_send ( struct ib_device *ibdev __unused, * @v completion Completion * @v iobuf I/O buffer */ -static void ipoib_complete_recv ( struct ib_device *ibdev __unused, - struct ib_queue_pair *qp, - struct ib_completion *completion, - struct io_buffer *iobuf ) { +static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; struct ib_global_route_header *grh = iobuf->data; @@ -232,7 +312,7 @@ static void ipoib_complete_recv ( struct ib_device *ibdev __unused, netdev_rx ( netdev, iobuf ); } - ipoib->rx_fill--; + ipoib->data.recv_fill--; } /** @@ -240,21 +320,21 @@ static void ipoib_complete_recv ( struct ib_device *ibdev __unused, * * @v ipoib IPoIB device */ -static void ipoib_refill_recv ( struct ipoib_device *ipoib ) { +static void ipoib_refill_recv ( struct ipoib_device *ipoib, + struct ipoib_queue_set *qset ) { struct ib_device *ibdev = ipoib->ibdev; struct io_buffer *iobuf; int rc; - while ( ipoib->rx_fill < IPOIB_NUM_RECV_WQES ) { + while ( qset->recv_fill < qset->recv_max_fill ) { iobuf = alloc_iob ( IPOIB_MTU ); if ( ! iobuf ) break; - if ( ( rc = ib_post_recv ( ibdev, ipoib->qp, - iobuf ) ) != 0 ) { + if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) { free_iob ( iobuf ); break; } - ipoib->rx_fill++; + qset->recv_fill++; } } @@ -267,9 +347,9 @@ static void ipoib_poll ( struct net_device *netdev ) { struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; - ib_poll_cq ( ibdev, ipoib->cq, ipoib_complete_send, - ipoib_complete_recv ); - ipoib_refill_recv ( ipoib ); + ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, + ipoib_data_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->data ); } /** @@ -295,7 +375,7 @@ static int ipoib_open ( struct net_device *netdev ) { int rc; /* Attach to broadcast multicast GID */ - if ( ( rc = ib_mcast_attach ( ibdev, ipoib->qp, + if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp, &ibdev->broadcast_gid ) ) != 0 ) { DBG ( "Could not attach to broadcast GID: %s\n", strerror ( rc ) ); @@ -303,7 +383,7 @@ static int ipoib_open ( struct net_device *netdev ) { } /* Fill receive ring */ - ipoib_refill_recv ( ipoib ); + ipoib_refill_recv ( ipoib, &ipoib->data ); return 0; } @@ -318,7 +398,7 @@ static void ipoib_close ( struct net_device *netdev ) { struct ib_device *ibdev = ipoib->ibdev; /* Detach from broadcast multicast GID */ - ib_mcast_detach ( ibdev, ipoib->qp, &ipoib_broadcast.gid ); + ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib_broadcast.gid ); /* FIXME: should probably flush the receive ring */ } @@ -353,32 +433,23 @@ int ipoib_probe ( struct ib_device *ibdev ) { ib_set_ownerdata ( ibdev, netdev ); netdev->dev = ibdev->dev; memset ( ipoib, 0, sizeof ( *ipoib ) ); + ipoib->netdev = netdev; ipoib->ibdev = ibdev; - /* Allocate completion queue */ - ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES ); - if ( ! ipoib->cq ) { - DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n", - ipoib ); - rc = -ENOMEM; - goto err_create_cq; + /* Allocate data queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, + IPOIB_DATA_NUM_CQES, + IPOIB_DATA_NUM_SEND_WQES, + IPOIB_DATA_NUM_RECV_WQES, + hack_ipoib_qkey ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_data_qset; } - /* Allocate queue pair */ - ipoib->qp = ib_create_qp ( ibdev, IPOIB_NUM_SEND_WQES, - ipoib->cq, IPOIB_NUM_RECV_WQES, - ipoib->cq, hack_ipoib_qkey ); - if ( ! ipoib->qp ) { - DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n", - ipoib ); - rc = -ENOMEM; - goto err_create_qp; - } - ipoib->qp->owner_priv = netdev; - /* Construct MAC address */ mac = ( ( struct ipoib_mac * ) netdev->ll_addr ); - mac->qpn = htonl ( ipoib->qp->qpn ); + mac->qpn = htonl ( ipoib->data.qp->qpn ); memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) ); /* Register network device */ @@ -388,10 +459,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { return 0; err_register_netdev: - ib_destroy_qp ( ibdev, ipoib->qp ); - err_create_qp: - ib_destroy_cq ( ibdev, ipoib->cq ); - err_create_cq: + ipoib_destroy_qset ( ipoib, &ipoib->data ); + err_create_data_qset: netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 7a68b7d4..9d38767f 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -150,6 +150,8 @@ void ib_destroy_qp ( struct ib_device *ibdev, DBGC ( ibdev, "IBDEV %p destroying queue pair %#lx\n", ibdev, qp->qpn ); ibdev->op->destroy_qp ( ibdev, qp ); + list_del ( &qp->send.list ); + list_del ( &qp->recv.list ); free ( qp ); } @@ -198,92 +200,3 @@ struct ib_device * alloc_ibdev ( size_t priv_size ) { void free_ibdev ( struct ib_device *ibdev ) { free ( ibdev ); } - - -#if 0 - -/** Infiniband broadcast MAC address */ -static uint8_t ib_broadcast[IB_ALEN] = { 0xff, }; - -/** - * Transmit Infiniband packet - * - * @v iobuf I/O buffer - * @v netdev Network device - * @v net_protocol Network-layer protocol - * @v ll_dest Link-layer destination address - * - * Prepends the Infiniband link-layer header and transmits the packet. - */ -static int ib_tx ( struct io_buffer *iobuf, struct net_device *netdev, - struct net_protocol *net_protocol, const void *ll_dest ) { - struct ibhdr *ibhdr = iob_push ( iobuf, sizeof ( *ibhdr ) ); - - /* Build Infiniband header */ - ibhdr->proto = net_protocol->net_proto; - ibhdr->reserved = 0; - - ( void ) ll_dest; - - /* Hand off to network device */ - return netdev_tx ( netdev, iobuf ); -} - -/** - * Process received Infiniband packet - * - * @v iobuf I/O buffer - * @v netdev Network device - * - * Strips off the Infiniband link-layer header and passes up to the - * network-layer protocol. - */ -static int ib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { - struct ibhdr *ibhdr = iobuf->data; - - /* Sanity check */ - if ( iob_len ( iobuf ) < sizeof ( *ibhdr ) ) { - DBG ( "Infiniband packet too short (%d bytes)\n", - iob_len ( iobuf ) ); - free_iob ( iobuf ); - return -EINVAL; - } - - /* Strip off Infiniband header */ - iob_pull ( iobuf, sizeof ( *ibhdr ) ); - - /* Hand off to network-layer protocol */ - return net_rx ( iobuf, netdev, ibhdr->proto, NULL ); -} - -/** - * Transcribe Infiniband address - * - * @v ll_addr Link-layer address - * @ret string Link-layer address in human-readable format - */ -const char * ib_ntoa ( const void *ll_addr ) { - static char buf[61]; - const uint8_t *ib_addr = ll_addr; - unsigned int i; - char *p = buf; - - for ( i = 0 ; i < IB_ALEN ; i++ ) { - p += sprintf ( p, ":%02x", ib_addr[i] ); - } - return ( buf + 1 ); -} - -/** Infiniband protocol */ -struct ll_protocol infiniband_protocol __ll_protocol = { - .name = "Infiniband", - .ll_proto = htons ( ARPHRD_INFINIBAND ), - .ll_addr_len = IB_ALEN, - .ll_header_len = IB_HLEN, - .ll_broadcast = ib_broadcast, - .tx = ib_tx, - .rx = ib_rx, - .ntoa = ib_ntoa, -}; - -#endif From e05a8cd4deb75ae145160cddf146daba0a17ced4 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 06:35:21 +0100 Subject: [PATCH 52/84] Use pkey table access to determine broadcast GID directly. --- src/drivers/net/mlx_ipoib/mt25218.c | 75 +++++++++++++++++++++++++---- src/include/gpxe/infiniband.h | 8 +++ 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index da33e97b..08207819 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1206,21 +1206,69 @@ static int arbel_get_guid_info ( struct arbel *arbel, return 0; } -static int arbel_get_port_gid ( struct arbel *arbel, struct ib_gid *gid ) { - struct ib_mad_port_info port_info; - struct ib_mad_guid_info guid_info; +static int arbel_get_pkey_table ( struct arbel *arbel, + struct ib_mad_pkey_table *pkey_table ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; int rc; - if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get pkey table: %s\n", + arbel, strerror ( rc ) ); return rc; - if ( ( rc = arbel_get_guid_info ( arbel, &guid_info ) ) != 0 ) - return rc; - memcpy ( &gid->bytes[0], port_info.gid_prefix, 8 ); - memcpy ( &gid->bytes[8], guid_info.gid_local, 8 ); + } + memcpy ( pkey_table, &mad.mad.pkey_table, sizeof ( *pkey_table ) ); return 0; } +static int arbel_get_port_gid ( struct arbel *arbel, + struct ib_gid *port_gid ) { + union { + /* This union exists just to save stack space */ + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + } u; + int rc; + /* Port info gives us the first half of the port GID */ + if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->bytes[0], u.port_info.gid_prefix, 8 ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->bytes[8], u.guid_info.gid_local, 8 ); + + return 0; +} + +static int arbel_get_broadcast_gid ( struct arbel *arbel, + struct ib_gid *broadcast_gid ) { + static const struct ib_gid ipv4_broadcast_gid = { + { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } + }; + struct ib_mad_pkey_table pkey_table; + int rc; + + /* Start with the IPv4 broadcast GID */ + memcpy ( broadcast_gid, &ipv4_broadcast_gid, + sizeof ( *broadcast_gid ) ); + + /* Add partition key */ + if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) + return rc; + memcpy ( &broadcast_gid->bytes[4], &pkey_table.pkey[0][0], + sizeof ( pkey_table.pkey[0][0] ) ); + + return 0; +} /** * Probe PCI device @@ -1282,6 +1330,14 @@ static int arbel_probe ( struct pci_device *pci, goto err_get_port_gid; } + /* Get broadcast GID */ + if ( ( rc = arbel_get_broadcast_gid ( arbel, + &ibdev->broadcast_gid ) ) != 0 ){ + DBGC ( arbel, "Arbel %p could not determine broadcast GID: " + "%s\n", arbel, strerror ( rc ) ); + goto err_get_broadcast_gid; + } + struct ud_av_st *bcast_av = ib_data.bcast_av; struct arbelprm_ud_address_vector *bav = ( struct arbelprm_ud_address_vector * ) &bcast_av->av; @@ -1294,8 +1350,6 @@ static int arbel_probe ( struct pci_device *pci, av->gid_present = 1; memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); - memcpy ( &ibdev->broadcast_gid, &ib_data.bcast_gid, 16 ); - /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", @@ -1306,6 +1360,7 @@ static int arbel_probe ( struct pci_device *pci, return 0; err_ipoib_probe: + err_get_broadcast_gid: err_get_port_gid: err_query_dev_lim: ib_driver_close ( 0 ); diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 236b2727..32f9d675 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -468,11 +468,19 @@ struct ib_mad_port_info { uint8_t link_speed_enabled__link_speed_active; } __attribute__ (( packed )); +struct ib_mad_pkey_table { + struct ib_mad_hdr mad_hdr; + uint32_t mkey[2]; + uint32_t reserved[8]; + uint16_t pkey[16][2]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; struct ib_mad_guid_info guid_info; struct ib_mad_port_info port_info; + struct ib_mad_pkey_table pkey_table; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ From b3d3814c176a068980f8c178e74d36e524fa7572 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 08:36:56 +0100 Subject: [PATCH 53/84] Obtains a response to the get path record! --- src/drivers/net/ipoib.c | 181 ++++++++++++++++++++++++++-- src/drivers/net/mlx_ipoib/ib_mad.c | 5 +- src/drivers/net/mlx_ipoib/ib_mad.h | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 42 ++++++- src/include/gpxe/infiniband.h | 63 +++++++++- 5 files changed, 276 insertions(+), 17 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 2d351e4b..a848b711 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -44,15 +44,24 @@ extern struct ib_address_vector hack_ipoib_bcast_av; /** IPoIB MTU */ #define IPOIB_MTU 2048 -/** Number of IPoIB send work queue entries */ +/** Number of IPoIB data send work queue entries */ #define IPOIB_DATA_NUM_SEND_WQES 4 -/** Number of IPoIB receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 8 +/** Number of IPoIB data receive work queue entries */ +#define IPOIB_DATA_NUM_RECV_WQES 4 -/** Number of IPoIB completion entries */ +/** Number of IPoIB data completion entries */ #define IPOIB_DATA_NUM_CQES 8 +/** Number of IPoIB metadata send work queue entries */ +#define IPOIB_META_NUM_SEND_WQES 4 + +/** Number of IPoIB metadata receive work queue entries */ +#define IPOIB_META_NUM_RECV_WQES 4 + +/** Number of IPoIB metadata completion entries */ +#define IPOIB_META_NUM_CQES 8 + /** An IPoIB queue set */ struct ipoib_queue_set { /** Completion queue */ @@ -84,10 +93,15 @@ struct ipoib_device { **************************************************************************** */ +/** Broadcast QPN used in IPoIB MAC addresses + * + * This is a guaranteed invalid real QPN + */ +#define IPOIB_BROADCAST_QPN 0xffffffffUL + /** Broadcast IPoIB address */ static struct ipoib_mac ipoib_broadcast = { - .gid = { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }, + .qpn = ntohl ( IPOIB_BROADCAST_QPN ), }; /** @@ -244,6 +258,73 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, return rc; } +/** + * Transmit path record request + * + * @v ipoib IPoIB device + * @v gid Destination GID + * @ret rc Return status code + */ +static int ipoib_get_path_record ( struct ipoib_device *ipoib, + struct ib_gid *gid ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_path_record *path_record; + struct ib_address_vector av; + static uint32_t tid = 0; + int rc; + + DBG ( "get_path_record():\n" ); + int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, + uint8_t *sl_p, uint8_t *rate_p); + uint16_t tmp_dlid; + uint8_t tmp_sl; + uint8_t tmp_rate; + get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); + + DBG ( "ipoib_get_path_record():\n" ); + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *path_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *path_record ) ); + path_record = iobuf->data; + memset ( path_record, 0, sizeof ( *path_record ) ); + + /* Construct path record request */ + path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + path_record->mad_hdr.class_version = 2; + path_record->mad_hdr.method = IB_MGMT_METHOD_GET; + path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); + path_record->mad_hdr.tid = tid++; + path_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); + memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); + memcpy ( &path_record->sgid, &ibdev->port_gid, + sizeof ( path_record->sgid ) ); + + DBG_HD ( path_record, sizeof ( *path_record ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_SA_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + /** * Transmit packet via IPoIB network device * @@ -256,19 +337,29 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + int rc; if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } + DBG ( "TX pseudo-header:\n" ); + DBG_HD ( ipoib_pshdr, sizeof ( *ipoib_pshdr ) ); + if ( ipoib_pshdr->peer.qpn != htonl ( IPOIB_BROADCAST_QPN ) ) { + DBG ( "Get path record\n" ); + rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); + free_iob ( iobuf ); + return 0; + } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); return ib_post_send ( ibdev, ipoib->data.qp, &hack_ipoib_bcast_av, iobuf ); } /** - * Handle IPoIB send completion + * Handle IPoIB data send completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -286,7 +377,7 @@ static void ipoib_data_complete_send ( struct ib_device *ibdev __unused, } /** - * Handle IPoIB receive completion + * Handle IPoIB data receive completion * * @v ibdev Infiniband device * @v qp Queue pair @@ -315,6 +406,61 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, ipoib->data.recv_fill--; } +/** + * Handle IPoIB metadata send completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "Woohoo! METADATA TX completion\n" ); + + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", + ipoib, completion->syndrome ); + } + free_iob ( iobuf ); +} + +/** + * Handle IPoIB metadata receive completion + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v completion Completion + * @v iobuf I/O buffer + */ +static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, + struct ib_queue_pair *qp, + struct ib_completion *completion, + struct io_buffer *iobuf ) { + struct net_device *netdev = qp->owner_priv; + struct ipoib_device *ipoib = netdev->priv; + + DBG ( "***************** META TX!!!!!! ********\n" ); + + if ( completion->syndrome ) { + DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", + ipoib, completion->syndrome ); + } else { + iob_put ( iobuf, completion->len ); + DBG ( "Metadata RX:\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + } + + ipoib->meta.recv_fill--; + free_iob ( iobuf ); +} + /** * Refill IPoIB receive ring * @@ -349,6 +495,9 @@ static void ipoib_poll ( struct net_device *netdev ) { ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, ipoib_data_complete_recv ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -382,7 +531,8 @@ static int ipoib_open ( struct net_device *netdev ) { return rc; } - /* Fill receive ring */ + /* Fill receive rings */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); return 0; @@ -436,6 +586,17 @@ int ipoib_probe ( struct ib_device *ibdev ) { ipoib->netdev = netdev; ipoib->ibdev = ibdev; + /* Allocate metadata queue set */ + if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, + IPOIB_META_NUM_CQES, + IPOIB_META_NUM_SEND_WQES, + IPOIB_META_NUM_RECV_WQES, + IB_SA_QKEY ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", + ipoib, strerror ( rc ) ); + goto err_create_meta_qset; + } + /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, @@ -461,6 +622,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { err_register_netdev: ipoib_destroy_qset ( ipoib, &ipoib->data ); err_create_data_qset: + ipoib_destroy_qset ( ipoib, &ipoib->meta ); + err_create_meta_qset: netdev_nullify ( netdev ); netdev_put ( netdev ); return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_mad.c b/src/drivers/net/mlx_ipoib/ib_mad.c index 73b49f20..4da4677b 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.c +++ b/src/drivers/net/mlx_ipoib/ib_mad.c @@ -264,7 +264,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) return is_good ? 0 : -1; } -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, u8 * rate_p) { struct path_record_mad_st *mad, *rcv_mad; @@ -321,6 +321,9 @@ static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, cpu_to_be_buf(mad, sizeof *mad); memcpy(mad->path_record.sgid.raw, ib_data.port_gid.raw, 16); + DBG ( "data:\n" ); + DBG_HD ( mad, sizeof ( *mad ) ); + rc = post_send_req(qp, snd_wqe, 1); if (rc) { eprintf(""); diff --git a/src/drivers/net/mlx_ipoib/ib_mad.h b/src/drivers/net/mlx_ipoib/ib_mad.h index 5ffb5404..51b90d21 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.h +++ b/src/drivers/net/mlx_ipoib/ib_mad.h @@ -104,7 +104,7 @@ union mad_u { struct ib_mad_st mad; } __attribute__ ((packed)); -static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, +int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, __u8 * rate_p); #endif /* __ib_mad_h__ */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 08207819..fb98d543 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -174,8 +174,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = in_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Input:\n" ); - DBG_HD ( in, dump_len ); + // DBG ( "Input:\n" ); + // DBG_HD ( in, dump_len ); } /* Issue command */ @@ -212,8 +212,8 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, size_t dump_len = out_len; if ( dump_len > 256 ) dump_len = 256; - DBG ( "Output:\n" ); - DBG_HD ( out, dump_len ); + // DBG ( "Output:\n" ); + // DBG_HD ( out, dump_len ); } return 0; @@ -749,7 +749,7 @@ static void arbel_ring_doorbell ( struct arbel *arbel, /** GID used for GID-less send work queue entries */ static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } }; /** @@ -805,6 +805,14 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + + if ( ! av->gid_present ) { + DBG ( "no_gid:\n" ); + DBG_HD ( &arbel_no_gid, sizeof ( arbel_no_gid ) ); + DBG ( "gid:\n" ); + DBG_HD ( &wqe->ud.u.dwords[4], 16 ); + } + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); @@ -821,6 +829,11 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); + + DBG ( "arbel_post_send()\n" ); + DBG_HD ( wqe, sizeof ( *wqe ) ); + + /* Update doorbell record */ barrier(); qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; @@ -1248,6 +1261,17 @@ static int arbel_get_port_gid ( struct arbel *arbel, return 0; } +static int arbel_get_sm_lid ( struct arbel *arbel, + unsigned long *sm_lid ) { + struct ib_mad_port_info port_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + *sm_lid = ntohs ( port_info.mastersm_lid ); + return 0; +} + static int arbel_get_broadcast_gid ( struct arbel *arbel, struct ib_gid *broadcast_gid ) { static const struct ib_gid ipv4_broadcast_gid = { @@ -1323,6 +1347,13 @@ static int arbel_probe ( struct pci_device *pci, arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + goto err_get_sm_lid; + } + /* Get port GID */ if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", @@ -1362,6 +1393,7 @@ static int arbel_probe ( struct pci_device *pci, err_ipoib_probe: err_get_broadcast_gid: err_get_port_gid: + err_get_sm_lid: err_query_dev_lim: ib_driver_close ( 0 ); err_ib_driver_init: diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 32f9d675..2d1d9433 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -10,6 +10,12 @@ #include #include +/** Subnet administrator QPN */ +#define IB_SA_QPN 1 + +/** Subnet administrator queue key */ +#define IB_SA_QKEY 0x80010000UL + /** An Infiniband Global Identifier */ struct ib_gid { uint8_t bytes[16]; @@ -250,7 +256,9 @@ struct ib_device { /** Port GID */ struct ib_gid port_gid; /** Broadcast GID */ - struct ib_gid broadcast_gid; + struct ib_gid broadcast_gid; + /** Subnet manager LID */ + unsigned long sm_lid; /** Underlying device */ struct device *dev; /** Infiniband operations */ @@ -422,6 +430,31 @@ static inline void * ib_get_ownerdata ( struct ib_device *ibdev ) { #define IB_SMP_ATTR_LED_INFO 0x0031 #define IB_SMP_ATTR_VENDOR_MASK 0xFF00 +#define IB_SA_ATTR_MC_MEMBER_REC 0x38 +#define IB_SA_ATTR_PATH_REC 0x35 + +#define IB_SA_MCMEMBER_REC_MGID (1<<0) +#define IB_SA_MCMEMBER_REC_PORT_GID (1<<1) +#define IB_SA_MCMEMBER_REC_QKEY (1<<2) +#define IB_SA_MCMEMBER_REC_MLID (1<<3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR (1<<4) +#define IB_SA_MCMEMBER_REC_MTU (1<<5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS (1<<6) +#define IB_SA_MCMEMBER_REC_PKEY (1<<7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR (1<<8) +#define IB_SA_MCMEMBER_REC_RATE (1<<9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR (1<<10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME (1<<11) +#define IB_SA_MCMEMBER_REC_SL (1<<12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL (1<<13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT (1<<14) +#define IB_SA_MCMEMBER_REC_SCOPE (1<<15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE (1<<16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN (1<<17) + +#define IB_SA_PATH_REC_DGID (1<<2) +#define IB_SA_PATH_REC_SGID (1<<3) + struct ib_mad_hdr { uint8_t base_version; uint8_t mgmt_class; @@ -435,6 +468,17 @@ struct ib_mad_hdr { uint32_t attr_mod; } __attribute__ (( packed )); +struct ib_sa_hdr { + uint32_t sm_key[2]; + uint16_t reserved; + uint16_t attrib_offset; + uint32_t comp_mask[2]; +} __attribute__ (( packed )); + +struct ib_rmpp_hdr { + uint32_t raw[3]; +} __attribute__ (( packed )); + struct ib_mad_data { struct ib_mad_hdr mad_hdr; uint8_t data[232]; @@ -475,12 +519,29 @@ struct ib_mad_pkey_table { uint16_t pkey[16][2]; } __attribute__ (( packed )); +struct ib_mad_path_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + uint32_t reserved0[2]; + struct ib_gid dgid; + struct ib_gid sgid; + uint16_t dlid; + uint16_t slid; + uint32_t hop_limit__flow_label__raw_traffic; + uint32_t pkey__numb_path__reversible__tclass; + uint32_t rate__rate_selector__mtu__mtu_selector__sl__reserved; + uint32_t preference__packet_lifetime__packet_lifetime_selector; + uint32_t reserved1[35]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; struct ib_mad_guid_info guid_info; struct ib_mad_port_info port_info; struct ib_mad_pkey_table pkey_table; + struct ib_mad_path_record path_record; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ From 22090901202332b5ce427436272b780d2ddfb324 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 08:41:06 +0100 Subject: [PATCH 54/84] Dead code disabling --- src/drivers/net/ipoib.c | 9 ++++----- src/drivers/net/mlx_ipoib/mt25218.c | 13 ------------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index a848b711..8432b2a6 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -274,6 +274,7 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, static uint32_t tid = 0; int rc; +#if 0 DBG ( "get_path_record():\n" ); int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, uint8_t *sl_p, uint8_t *rate_p); @@ -283,6 +284,7 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); DBG ( "ipoib_get_path_record():\n" ); +#endif /* Allocate I/O buffer */ iobuf = alloc_iob ( sizeof ( *path_record ) ); @@ -305,7 +307,7 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, memcpy ( &path_record->sgid, &ibdev->port_gid, sizeof ( path_record->sgid ) ); - DBG_HD ( path_record, sizeof ( *path_record ) ); + // DBG_HD ( path_record, sizeof ( *path_record ) ); /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); @@ -421,9 +423,6 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - DBG ( "Woohoo! METADATA TX completion\n" ); - - if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n", ipoib, completion->syndrome ); @@ -446,7 +445,7 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - DBG ( "***************** META TX!!!!!! ********\n" ); + DBG ( "***************** META RX!!!!!! ********\n" ); if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index fb98d543..6b5bd066 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -805,14 +805,6 @@ static int arbel_post_send ( struct ib_device *ibdev, MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); - - if ( ! av->gid_present ) { - DBG ( "no_gid:\n" ); - DBG_HD ( &arbel_no_gid, sizeof ( arbel_no_gid ) ); - DBG ( "gid:\n" ); - DBG_HD ( &wqe->ud.u.dwords[4], 16 ); - } - MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); @@ -829,11 +821,6 @@ static int arbel_post_send ( struct ib_device *ibdev, f, 1, always1, 1 ); - - DBG ( "arbel_post_send()\n" ); - DBG_HD ( wqe, sizeof ( *wqe ) ); - - /* Update doorbell record */ barrier(); qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; From ab191e45dbfe13c5bd2c7f6415c520e8d60ad850 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 10:39:30 +0100 Subject: [PATCH 55/84] Unicasts seem to be working. :) --- src/drivers/net/ipoib.c | 139 ++++++++++++++++++++++------ src/drivers/net/mlx_ipoib/mt25218.c | 14 +-- src/include/gpxe/infiniband.h | 16 +++- 3 files changed, 128 insertions(+), 41 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 8432b2a6..f3b821c4 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -86,6 +86,32 @@ struct ipoib_device { struct ipoib_queue_set meta; }; +/** + * IPoIB path cache entry + * + * This serves a similar role to the ARP cache for Ethernet. (ARP + * *is* used on IPoIB; we have two caches to maintain.) + */ +struct ipoib_cached_path { + /** Destination GID */ + struct ib_gid gid; + /** Destination LID */ + unsigned int dlid; + /** Service level */ + unsigned int sl; + /** Rate */ + unsigned int rate; +}; + +/** Number of IPoIB path cache entries */ +#define IPOIB_NUM_CACHED_PATHS 2 + +/** IPoIB path cache */ +static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; + +/** Oldest IPoIB path cache entry index */ +static unsigned int ipoib_path_cache_idx = 0; + /**************************************************************************** * * IPoIB link layer @@ -165,15 +191,15 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { * @ret string Link-layer address in human-readable format */ const char * ipoib_ntoa ( const void *ll_addr ) { - static char buf[61]; - const uint8_t *ipoib_addr = ll_addr; - unsigned int i; - char *p = buf; + static char buf[45]; + const struct ipoib_mac *mac = ll_addr; - for ( i = 0 ; i < IPOIB_ALEN ; i++ ) { - p += sprintf ( p, ":%02x", ipoib_addr[i] ); - } - return ( buf + 1 ); + snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx", + htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ), + htonl ( mac->gid.u.dwords[1] ), + htonl ( mac->gid.u.dwords[2] ), + htonl ( mac->gid.u.dwords[3] ) ); + return buf; } /** IPoIB protocol */ @@ -258,6 +284,28 @@ static int ipoib_create_qset ( struct ipoib_device *ipoib, return rc; } +/** + * Find path cache entry by GID + * + * @v gid GID + * @ret entry Path cache entry, or NULL + */ +static struct ipoib_cached_path * +ipoib_find_cached_path ( struct ib_gid *gid ) { + struct ipoib_cached_path *path; + unsigned int i; + + for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) { + path = &ipoib_path_cache[i]; + if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 ) + return path; + } + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n", + htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ), + htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) ); + return NULL; +} + /** * Transmit path record request * @@ -274,17 +322,15 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, static uint32_t tid = 0; int rc; -#if 0 - DBG ( "get_path_record():\n" ); int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, uint8_t *sl_p, uint8_t *rate_p); uint16_t tmp_dlid; uint8_t tmp_sl; uint8_t tmp_rate; get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); + DBG ( "get_path_record() gives dlid = %04x, sl = %02x, rate = %02x\n", + tmp_dlid, tmp_sl, tmp_rate ); - DBG ( "ipoib_get_path_record():\n" ); -#endif /* Allocate I/O buffer */ iobuf = alloc_iob ( sizeof ( *path_record ) ); @@ -307,13 +353,11 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, memcpy ( &path_record->sgid, &ibdev->port_gid, sizeof ( path_record->sgid ) ); - // DBG_HD ( path_record, sizeof ( *path_record ) ); - /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); av.dlid = ibdev->sm_lid; av.dest_qp = IB_SA_QPN; - av.qkey = IB_SA_QKEY; + av.qkey = IB_GLOBAL_QKEY; /* Post send request */ if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, @@ -339,6 +383,8 @@ static int ipoib_transmit ( struct net_device *netdev, struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; + struct ib_address_vector av; + struct ipoib_cached_path *path; int rc; if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { @@ -346,18 +392,32 @@ static int ipoib_transmit ( struct net_device *netdev, return -EINVAL; } - DBG ( "TX pseudo-header:\n" ); - DBG_HD ( ipoib_pshdr, sizeof ( *ipoib_pshdr ) ); - if ( ipoib_pshdr->peer.qpn != htonl ( IPOIB_BROADCAST_QPN ) ) { - DBG ( "Get path record\n" ); - rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); - free_iob ( iobuf ); - return 0; + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { + /* Broadcast address */ + memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) ); + } else { + /* Unicast - look in path cache */ + path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); + if ( ! path ) { + /* No path entry - get path record */ + rc = ipoib_get_path_record ( ipoib, + &ipoib_pshdr->peer.gid ); + free_iob ( iobuf ); + return rc; + } + av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); + av.qkey = IB_GLOBAL_QKEY; + av.dlid = path->dlid; + av.rate = path->rate; + av.sl = path->sl; + av.gid_present = 1; + memcpy ( &av.gid, &ipoib_pshdr->peer.gid, sizeof ( av.gid ) ); } iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); - return ib_post_send ( ibdev, ipoib->data.qp, - &hack_ipoib_bcast_av, iobuf ); + return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); } /** @@ -392,14 +452,13 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - struct ib_global_route_header *grh = iobuf->data; struct ipoib_pseudo_hdr *ipoib_pshdr; if ( completion->syndrome ) { netdev_rx_err ( netdev, iobuf, -EIO ); } else { iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, ( sizeof ( *grh ) - + iob_pull ( iobuf, ( sizeof ( struct ib_global_route_header ) - sizeof ( *ipoib_pshdr ) ) ); /* FIXME: fill in a MAC address for the sake of AoE! */ netdev_rx ( netdev, iobuf ); @@ -444,16 +503,38 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - - DBG ( "***************** META RX!!!!!! ********\n" ); + struct ib_mad_path_record *path_record; + struct ipoib_cached_path *path; if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", ipoib, completion->syndrome ); } else { + /* Update path cache */ iob_put ( iobuf, completion->len ); + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + DBG ( "Metadata RX:\n" ); DBG_HD ( iobuf->data, iob_len ( iobuf ) ); + + path_record = iobuf->data; + path = &ipoib_path_cache[ipoib_path_cache_idx]; + memcpy ( &path->gid, &path_record->dgid, + sizeof ( path->gid ) ); + path->dlid = ntohs ( path_record->dlid ); + path->sl = ( path_record->reserved__sl & 0x0f ); + path->rate = ( path_record->rate_selector__rate & 0x3f ); + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", + htonl ( path->gid.u.dwords[0] ), + htonl ( path->gid.u.dwords[1] ), + htonl ( path->gid.u.dwords[2] ), + htonl ( path->gid.u.dwords[3] ), + path->dlid, path->sl, path->rate ); + + /* Update path cache index */ + ipoib_path_cache_idx++; + if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) + ipoib_path_cache_idx = 0; } ipoib->meta.recv_fill--; @@ -590,7 +671,7 @@ int ipoib_probe ( struct ib_device *ibdev ) { IPOIB_META_NUM_CQES, IPOIB_META_NUM_SEND_WQES, IPOIB_META_NUM_RECV_WQES, - IB_SA_QKEY ) ) != 0 ) { + IB_GLOBAL_QKEY ) ) != 0 ) { DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n", ipoib, strerror ( rc ) ); goto err_create_meta_qset; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 6b5bd066..0fd832e7 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -749,7 +749,7 @@ static void arbel_ring_doorbell ( struct arbel *arbel, /** GID used for GID-less send work queue entries */ static const struct ib_gid arbel_no_gid = { - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } } }; /** @@ -1238,12 +1238,12 @@ static int arbel_get_port_gid ( struct arbel *arbel, /* Port info gives us the first half of the port GID */ if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) return rc; - memcpy ( &port_gid->bytes[0], u.port_info.gid_prefix, 8 ); - + memcpy ( &port_gid->u.bytes[0], u.port_info.gid_prefix, 8 ); + /* GUID info gives us the second half of the port GID */ if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) return rc; - memcpy ( &port_gid->bytes[8], u.guid_info.gid_local, 8 ); + memcpy ( &port_gid->u.bytes[8], u.guid_info.gid_local, 8 ); return 0; } @@ -1262,8 +1262,8 @@ static int arbel_get_sm_lid ( struct arbel *arbel, static int arbel_get_broadcast_gid ( struct arbel *arbel, struct ib_gid *broadcast_gid ) { static const struct ib_gid ipv4_broadcast_gid = { - { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } + { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } }; struct ib_mad_pkey_table pkey_table; int rc; @@ -1275,7 +1275,7 @@ static int arbel_get_broadcast_gid ( struct arbel *arbel, /* Add partition key */ if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) return rc; - memcpy ( &broadcast_gid->bytes[4], &pkey_table.pkey[0][0], + memcpy ( &broadcast_gid->u.bytes[4], &pkey_table.pkey[0][0], sizeof ( pkey_table.pkey[0][0] ) ); return 0; diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 2d1d9433..845c4c22 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -14,11 +14,14 @@ #define IB_SA_QPN 1 /** Subnet administrator queue key */ -#define IB_SA_QKEY 0x80010000UL +#define IB_GLOBAL_QKEY 0x80010000UL /** An Infiniband Global Identifier */ struct ib_gid { - uint8_t bytes[16]; + union { + uint8_t bytes[16]; + uint32_t dwords[4]; + } u; }; /** An Infiniband Global Route Header */ @@ -136,7 +139,7 @@ struct ib_address_vector { /** Destination Queue Pair */ unsigned int dest_qp; /** Queue key */ - unsigned int qkey; + unsigned long qkey; /** Destination Local ID */ unsigned int dlid; /** Rate */ @@ -530,9 +533,12 @@ struct ib_mad_path_record { uint16_t slid; uint32_t hop_limit__flow_label__raw_traffic; uint32_t pkey__numb_path__reversible__tclass; - uint32_t rate__rate_selector__mtu__mtu_selector__sl__reserved; + uint8_t reserved1; + uint8_t reserved__sl; + uint8_t mtu_selector__mtu; + uint8_t rate_selector__rate; uint32_t preference__packet_lifetime__packet_lifetime_selector; - uint32_t reserved1[35]; + uint32_t reserved2[35]; } __attribute__ (( packed )); union ib_mad { From 32a7bbb1e3172f08d86003905fb0689bacb68245 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 10:56:21 +0100 Subject: [PATCH 56/84] Cleaned up some debug messages. --- src/drivers/net/ipoib.c | 17 +++------------ src/drivers/net/mlx_ipoib/mt25218.c | 32 +++++++++++------------------ 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index f3b821c4..81011405 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -322,16 +322,6 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, static uint32_t tid = 0; int rc; - int get_path_record(struct ib_gid *dgid, uint16_t *dlid_p, - uint8_t *sl_p, uint8_t *rate_p); - uint16_t tmp_dlid; - uint8_t tmp_sl; - uint8_t tmp_rate; - get_path_record ( gid, &tmp_dlid, &tmp_sl, &tmp_rate ); - DBG ( "get_path_record() gives dlid = %04x, sl = %02x, rate = %02x\n", - tmp_dlid, tmp_sl, tmp_rate ); - - /* Allocate I/O buffer */ iobuf = alloc_iob ( sizeof ( *path_record ) ); if ( ! iobuf ) @@ -513,10 +503,6 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, /* Update path cache */ iob_put ( iobuf, completion->len ); iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - - DBG ( "Metadata RX:\n" ); - DBG_HD ( iobuf->data, iob_len ( iobuf ) ); - path_record = iobuf->data; path = &ipoib_path_cache[ipoib_path_cache_idx]; memcpy ( &path->gid, &path_record->dgid, @@ -716,8 +702,11 @@ int ipoib_probe ( struct ib_device *ibdev ) { */ void ipoib_remove ( struct ib_device *ibdev ) { struct net_device *netdev = ib_get_ownerdata ( ibdev ); + struct ipoib_device *ipoib = netdev->priv; unregister_netdev ( netdev ); + ipoib_destroy_qset ( ipoib, &ipoib->data ); + ipoib_destroy_qset ( ipoib, &ipoib->meta ); netdev_nullify ( netdev ); netdev_put ( netdev ); } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 0fd832e7..5f3c3c4d 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -138,10 +138,10 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, unsigned int i; int rc; - DBGC ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", - arbel, opcode, in_len, - ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, - ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); + DBGC2 ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", + arbel, opcode, in_len, + ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, + ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); /* Check that HCR is free */ if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { @@ -168,14 +168,10 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, opcode, opcode, opcode_modifier, op_mod, go, 1 ); - - DBG_HD ( &hcr, sizeof ( hcr ) ); + DBGC2_HD ( arbel, &hcr, sizeof ( hcr ) ); if ( in_len ) { - size_t dump_len = in_len; - if ( dump_len > 256 ) - dump_len = 256; - // DBG ( "Input:\n" ); - // DBG_HD ( in, dump_len ); + DBGC2 ( arbel, "Input:\n" ); + DBGC2_HD ( arbel, in, ( ( in_len < 256 ) ? in_len : 256 ) ); } /* Issue command */ @@ -207,13 +203,9 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); memcpy ( out, out_buffer, out_len ); - if ( out_len ) { - size_t dump_len = out_len; - if ( dump_len > 256 ) - dump_len = 256; - // DBG ( "Output:\n" ); - // DBG_HD ( out, dump_len ); + DBGC2 ( arbel, "Output:\n" ); + DBGC2_HD ( arbel, out, ( ( out_len < 256 ) ? out_len : 256 ) ); } return 0; @@ -737,9 +729,9 @@ static void arbel_ring_doorbell ( struct arbel *arbel, union arbelprm_doorbell_register *db_reg, unsigned int offset ) { - DBG ( "arbel_ring_doorbell %08lx:%08lx to %lx\n", - db_reg->dword[0], db_reg->dword[1], - virt_to_phys ( arbel->uar + offset ) ); + DBGC2 ( arbel, "Arbel %p ringing doorbell %08lx:%08lx at %lx\n", + arbel, db_reg->dword[0], db_reg->dword[1], + virt_to_phys ( arbel->uar + offset ) ); barrier(); writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); From 2ed1acb9e938cbb5ae331c7d858f193fd40170b4 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 11:50:25 +0100 Subject: [PATCH 57/84] Broadcast GID is now calculated by IPoIB layer. --- src/drivers/net/ipoib.c | 103 +++++++++++++++++++++++++++- src/drivers/net/mlx_ipoib/mt25218.c | 27 ++------ src/include/gpxe/infiniband.h | 25 ++++++- 3 files changed, 130 insertions(+), 25 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 81011405..c4eea5a7 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -84,6 +84,8 @@ struct ipoib_device { struct ipoib_queue_set data; /** Data queue set */ struct ipoib_queue_set meta; + /** Broadcast GID */ + struct ib_gid broadcast_gid; }; /** @@ -112,6 +114,15 @@ static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; /** Oldest IPoIB path cache entry index */ static unsigned int ipoib_path_cache_idx = 0; +/** IPoIB metadata TID */ +static uint32_t ipoib_meta_tid = 0; + +/** IPv4 broadcast GID */ +static const struct ib_gid ipv4_broadcast_gid = { + { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } +}; + /**************************************************************************** * * IPoIB link layer @@ -319,7 +330,6 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, struct io_buffer *iobuf; struct ib_mad_path_record *path_record; struct ib_address_vector av; - static uint32_t tid = 0; int rc; /* Allocate I/O buffer */ @@ -336,7 +346,7 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, path_record->mad_hdr.class_version = 2; path_record->mad_hdr.method = IB_MGMT_METHOD_GET; path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - path_record->mad_hdr.tid = tid++; + path_record->mad_hdr.tid = ipoib_meta_tid++; path_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); @@ -361,6 +371,65 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, return 0; } +/** + * Transmit multicast group membership request + * + * @v ipoib IPoIB device + * @v gid Multicast GID + * @v join Join (rather than leave) group + * @ret rc Return status code + */ +static int ipoib_mc_member_record ( struct ipoib_device *ipoib, + struct ib_gid *gid, int join ) { + struct ib_device *ibdev = ipoib->ibdev; + struct io_buffer *iobuf; + struct ib_mad_mc_member_record *mc_member_record; + struct ib_address_vector av; + int rc; + + /* Allocate I/O buffer */ + iobuf = alloc_iob ( sizeof ( *mc_member_record ) ); + if ( ! iobuf ) + return -ENOMEM; + iob_put ( iobuf, sizeof ( *mc_member_record ) ); + mc_member_record = iobuf->data; + memset ( mc_member_record, 0, sizeof ( *mc_member_record ) ); + + /* Construct path record request */ + mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION; + mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; + mc_member_record->mad_hdr.class_version = 2; + mc_member_record->mad_hdr.method = + ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); + mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); + mc_member_record->mad_hdr.tid = ipoib_meta_tid++; + mc_member_record->sa_hdr.comp_mask[1] = + htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | + IB_SA_MCMEMBER_REC_JOIN_STATE ); + mc_member_record->scope__join_state = 1; + memcpy ( &mc_member_record->mgid, gid, + sizeof ( mc_member_record->mgid ) ); + memcpy ( &mc_member_record->port_gid, &ibdev->port_gid, + sizeof ( mc_member_record->port_gid ) ); + + /* Construct address vector */ + memset ( &av, 0, sizeof ( av ) ); + av.dlid = ibdev->sm_lid; + av.dest_qp = IB_SA_QPN; + av.qkey = IB_GLOBAL_QKEY; + + /* Post send request */ + if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av, + iobuf ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n", + ipoib, strerror ( rc ) ); + free_iob ( iobuf ); + return rc; + } + + return 0; +} + /** * Transmit packet via IPoIB network device * @@ -591,7 +660,7 @@ static int ipoib_open ( struct net_device *netdev ) { /* Attach to broadcast multicast GID */ if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp, - &ibdev->broadcast_gid ) ) != 0 ) { + &ipoib->broadcast_gid ) ) != 0 ) { DBG ( "Could not attach to broadcast GID: %s\n", strerror ( rc ) ); return rc; @@ -628,6 +697,27 @@ static struct net_device_operations ipoib_operations = { .irq = ipoib_irq, }; +/** + * Join IPoIB broadcast group + * + * @v ipoib IPoIB device + * @ret rc Return status code + */ +int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { + int rc; + + /* Send join request */ + if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid, + 1 ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n", + ipoib, strerror ( rc ) ); + return rc; + } + + + return 0; +} + /** * Probe IPoIB device * @@ -652,6 +742,11 @@ int ipoib_probe ( struct ib_device *ibdev ) { ipoib->netdev = netdev; ipoib->ibdev = ibdev; + /* Calculate broadcast GID */ + memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid, + sizeof ( ipoib->broadcast_gid ) ); + ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey ); + /* Allocate metadata queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta, IPOIB_META_NUM_CQES, @@ -663,6 +758,8 @@ int ipoib_probe ( struct ib_device *ibdev ) { goto err_create_meta_qset; } + + /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 5f3c3c4d..920874cc 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1251,25 +1251,13 @@ static int arbel_get_sm_lid ( struct arbel *arbel, return 0; } -static int arbel_get_broadcast_gid ( struct arbel *arbel, - struct ib_gid *broadcast_gid ) { - static const struct ib_gid ipv4_broadcast_gid = { - { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } - }; +static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) { struct ib_mad_pkey_table pkey_table; int rc; - /* Start with the IPv4 broadcast GID */ - memcpy ( broadcast_gid, &ipv4_broadcast_gid, - sizeof ( *broadcast_gid ) ); - - /* Add partition key */ if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) return rc; - memcpy ( &broadcast_gid->u.bytes[4], &pkey_table.pkey[0][0], - sizeof ( pkey_table.pkey[0][0] ) ); - + *pkey = ntohs ( pkey_table.pkey[0][0] ); return 0; } @@ -1340,12 +1328,11 @@ static int arbel_probe ( struct pci_device *pci, goto err_get_port_gid; } - /* Get broadcast GID */ - if ( ( rc = arbel_get_broadcast_gid ( arbel, - &ibdev->broadcast_gid ) ) != 0 ){ - DBGC ( arbel, "Arbel %p could not determine broadcast GID: " + /* Get partition key */ + if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine partition key: " "%s\n", arbel, strerror ( rc ) ); - goto err_get_broadcast_gid; + goto err_get_pkey; } struct ud_av_st *bcast_av = ib_data.bcast_av; @@ -1370,7 +1357,7 @@ static int arbel_probe ( struct pci_device *pci, return 0; err_ipoib_probe: - err_get_broadcast_gid: + err_get_pkey: err_get_port_gid: err_get_sm_lid: err_query_dev_lim: diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 845c4c22..8e358bce 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -20,6 +20,7 @@ struct ib_gid { union { uint8_t bytes[16]; + uint16_t words[8]; uint32_t dwords[4]; } u; }; @@ -258,10 +259,10 @@ struct ib_device_operations { struct ib_device { /** Port GID */ struct ib_gid port_gid; - /** Broadcast GID */ - struct ib_gid broadcast_gid; /** Subnet manager LID */ unsigned long sm_lid; + /** Partition key */ + unsigned int pkey; /** Underlying device */ struct device *dev; /** Infiniband operations */ @@ -541,6 +542,26 @@ struct ib_mad_path_record { uint32_t reserved2[35]; } __attribute__ (( packed )); +struct ib_mad_mc_member_record { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + struct ib_gid mgid; + struct ib_gid port_gid; + uint32_t qkey; + uint16_t mlid; + uint8_t mtu_selector__mtu; + uint8_t tclass; + uint16_t pkey; + uint8_t rate_selector__rate; + uint8_t packet_lifetime_selector__packet_lifetime; + uint32_t sl__flow_label__hop_limit; + uint8_t scope__join_state; + uint8_t proxy_join__reserved; + uint16_t reserved0; + uint32_t reserved1[37]; +} __attribute__ (( packed )); + union ib_mad { struct ib_mad_hdr mad_hdr; struct ib_mad_data data; From bdac59172695b746b48550a1ebff02c405e7b3f8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 13:31:13 +0100 Subject: [PATCH 58/84] Some interesting packet corruption happening now. --- src/drivers/net/ipoib.c | 223 ++++++++++++++++++++++------ src/drivers/net/mlx_ipoib/mt25218.c | 1 + src/include/gpxe/infiniband.h | 10 +- 3 files changed, 188 insertions(+), 46 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index c4eea5a7..077912b3 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -18,9 +18,11 @@ #include #include +#include #include #include #include +#include "timer.h" #include #include #include @@ -86,6 +88,12 @@ struct ipoib_device { struct ipoib_queue_set meta; /** Broadcast GID */ struct ib_gid broadcast_gid; + /** Broadcast LID */ + unsigned int broadcast_lid; + /** Joined to broadcast group */ + int broadcast_joined; + /** Data queue key */ + unsigned long data_qkey; }; /** @@ -114,6 +122,12 @@ static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS]; /** Oldest IPoIB path cache entry index */ static unsigned int ipoib_path_cache_idx = 0; +/** TID half used to identify get path record replies */ +#define IPOIB_TID_GET_PATH_REC 0x11111111UL + +/** TID half used to identify multicast member record replies */ +#define IPOIB_TID_MC_MEMBER_REC 0x22222222UL + /** IPoIB metadata TID */ static uint32_t ipoib_meta_tid = 0; @@ -123,6 +137,9 @@ static const struct ib_gid ipv4_broadcast_gid = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } } }; +/** Maximum time we will wait for the broadcast join to succeed */ +#define IPOIB_JOIN_MAX_DELAY_MS 1000 + /**************************************************************************** * * IPoIB link layer @@ -181,8 +198,8 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) { - DBG ( "IPoIB packet too short (%d bytes)\n", - iob_len ( iobuf ) ); + DBG ( "IPoIB packet too short for link-layer header\n" ); + DBG_HD ( iobuf->data, iob_len ( iobuf ) ); free_iob ( iobuf ); return -EINVAL; } @@ -346,7 +363,8 @@ static int ipoib_get_path_record ( struct ipoib_device *ipoib, path_record->mad_hdr.class_version = 2; path_record->mad_hdr.method = IB_MGMT_METHOD_GET; path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC ); - path_record->mad_hdr.tid = ipoib_meta_tid++; + path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC; + path_record->mad_hdr.tid[1] = ipoib_meta_tid++; path_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID ); memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) ); @@ -402,7 +420,8 @@ static int ipoib_mc_member_record ( struct ipoib_device *ipoib, mc_member_record->mad_hdr.method = ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE ); mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC ); - mc_member_record->mad_hdr.tid = ipoib_meta_tid++; + mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC; + mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++; mc_member_record->sa_hdr.comp_mask[1] = htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE ); @@ -443,19 +462,29 @@ static int ipoib_transmit ( struct net_device *netdev, struct ib_device *ibdev = ipoib->ibdev; struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data; struct ib_address_vector av; + struct ib_gid *gid; struct ipoib_cached_path *path; int rc; + /* Sanity check */ if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) { DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib ); return -EINVAL; } + iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); /* Construct address vector */ memset ( &av, 0, sizeof ( av ) ); + av.qkey = IB_GLOBAL_QKEY; + av.gid_present = 1; if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { /* Broadcast address */ +#if 0 memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) ); +#endif + av.dest_qp = IB_BROADCAST_QPN; + av.dlid = ipoib->broadcast_lid; + gid = &ipoib->broadcast_gid; } else { /* Unicast - look in path cache */ path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid ); @@ -467,15 +496,13 @@ static int ipoib_transmit ( struct net_device *netdev, return rc; } av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); - av.qkey = IB_GLOBAL_QKEY; av.dlid = path->dlid; av.rate = path->rate; av.sl = path->sl; - av.gid_present = 1; - memcpy ( &av.gid, &ipoib_pshdr->peer.gid, sizeof ( av.gid ) ); + gid = &ipoib_pshdr->peer.gid; } + memcpy ( &av.gid, gid, sizeof ( av.gid ) ); - iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) ); return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf ); } @@ -515,14 +542,33 @@ static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused, if ( completion->syndrome ) { netdev_rx_err ( netdev, iobuf, -EIO ); - } else { - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, ( sizeof ( struct ib_global_route_header ) - - sizeof ( *ipoib_pshdr ) ) ); - /* FIXME: fill in a MAC address for the sake of AoE! */ - netdev_rx ( netdev, iobuf ); + goto done; } + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + + if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) { + DBGC ( ipoib, "IPoIB %p received data packet too short to " + "contain IPoIB header\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + netdev_rx_err ( netdev, iobuf, -EIO ); + goto done; + } + + ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) ); + /* FIXME: fill in a MAC address for the sake of AoE! */ + + netdev_rx ( netdev, iobuf ); + + done: ipoib->data.recv_fill--; } @@ -548,6 +594,52 @@ static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused, free_iob ( iobuf ); } +/** + * Handle received IPoIB path record + * + * @v ipoib IPoIB device + * @v path_record Path record + */ +static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused, + struct ib_mad_path_record *path_record ) { + struct ipoib_cached_path *path; + + /* Update path cache entry */ + path = &ipoib_path_cache[ipoib_path_cache_idx]; + memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) ); + path->dlid = ntohs ( path_record->dlid ); + path->sl = ( path_record->reserved__sl & 0x0f ); + path->rate = ( path_record->rate_selector__rate & 0x3f ); + + DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", + htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ), + htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ), + path->dlid, path->sl, path->rate ); + + /* Update path cache index */ + ipoib_path_cache_idx++; + if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) + ipoib_path_cache_idx = 0; +} + +/** + * Handle received IPoIB multicast membership record + * + * @v ipoib IPoIB device + * @v mc_member_record Multicast membership record + */ +static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib, + struct ib_mad_mc_member_record *mc_member_record ) { + /* Record parameters */ + ipoib->broadcast_joined = + ( mc_member_record->scope__join_state & 0x0f ); + ipoib->data_qkey = ntohl ( mc_member_record->qkey ); + ipoib->broadcast_lid = ntohs ( mc_member_record->mlid ); + DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n", + ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ), + ipoib->data_qkey, ipoib->broadcast_lid ); +} + /** * Handle IPoIB metadata receive completion * @@ -562,36 +654,51 @@ static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused, struct io_buffer *iobuf ) { struct net_device *netdev = qp->owner_priv; struct ipoib_device *ipoib = netdev->priv; - struct ib_mad_path_record *path_record; - struct ipoib_cached_path *path; + union ib_mad *mad; if ( completion->syndrome ) { DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n", ipoib, completion->syndrome ); - } else { - /* Update path cache */ - iob_put ( iobuf, completion->len ); - iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); - path_record = iobuf->data; - path = &ipoib_path_cache[ipoib_path_cache_idx]; - memcpy ( &path->gid, &path_record->dgid, - sizeof ( path->gid ) ); - path->dlid = ntohs ( path_record->dlid ); - path->sl = ( path_record->reserved__sl & 0x0f ); - path->rate = ( path_record->rate_selector__rate & 0x3f ); - DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n", - htonl ( path->gid.u.dwords[0] ), - htonl ( path->gid.u.dwords[1] ), - htonl ( path->gid.u.dwords[2] ), - htonl ( path->gid.u.dwords[3] ), - path->dlid, path->sl, path->rate ); - - /* Update path cache index */ - ipoib_path_cache_idx++; - if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS ) - ipoib_path_cache_idx = 0; + goto done; } + iob_put ( iobuf, completion->len ); + if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain GRH\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) ); + if ( iob_len ( iobuf ) < sizeof ( *mad ) ) { + DBGC ( ipoib, "IPoIB %p received metadata packet too short " + "to contain reply\n", ipoib ); + DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) ); + goto done; + } + mad = iobuf->data; + + if ( mad->mad_hdr.status != 0 ) { + DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n", + ipoib, ntohs ( mad->mad_hdr.status ) ); + goto done; + } + + switch ( mad->mad_hdr.tid[0] ) { + case IPOIB_TID_GET_PATH_REC: + ipoib_recv_path_record ( ipoib, &mad->path_record ); + break; + case IPOIB_TID_MC_MEMBER_REC: + ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record ); + break; + default: + DBGC ( ipoib, "IPoIB %p unwanted response:\n", + ipoib ); + DBGC_HD ( ipoib, mad, sizeof ( *mad ) ); + break; + } + + done: ipoib->meta.recv_fill--; free_iob ( iobuf ); } @@ -628,10 +735,10 @@ static void ipoib_poll ( struct net_device *netdev ) { struct ipoib_device *ipoib = netdev->priv; struct ib_device *ibdev = ipoib->ibdev; - ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, - ipoib_data_complete_recv ); ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, ipoib_meta_complete_recv ); + ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send, + ipoib_data_complete_recv ); ipoib_refill_recv ( ipoib, &ipoib->meta ); ipoib_refill_recv ( ipoib, &ipoib->data ); } @@ -703,9 +810,14 @@ static struct net_device_operations ipoib_operations = { * @v ipoib IPoIB device * @ret rc Return status code */ -int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { +static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { + struct ib_device *ibdev = ipoib->ibdev; + unsigned int delay_ms; int rc; + /* Make sure we have some receive descriptors */ + ipoib_refill_recv ( ipoib, &ipoib->meta ); + /* Send join request */ if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid, 1 ) ) != 0 ) { @@ -714,8 +826,23 @@ int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) { return rc; } + /* Wait for join to complete. Ideally we wouldn't delay for + * this long, but we need the queue key before we can set up + * the data queue pair, which we need before we can know the + * MAC address. + */ + for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) { + mdelay ( 1 ); + ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send, + ipoib_meta_complete_recv ); + ipoib_refill_recv ( ipoib, &ipoib->meta ); + if ( ipoib->broadcast_joined ) + return 0; + } + DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n", + ipoib ); - return 0; + return -ETIMEDOUT; } /** @@ -758,14 +885,23 @@ int ipoib_probe ( struct ib_device *ibdev ) { goto err_create_meta_qset; } +#if 0 + ipoib->data_qkey = hack_ipoib_qkey; +#endif + /* Join broadcast group */ + if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) { + DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n", + ipoib, strerror ( rc ) ); + goto err_join_broadcast_group; + } /* Allocate data queue set */ if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data, IPOIB_DATA_NUM_CQES, IPOIB_DATA_NUM_SEND_WQES, IPOIB_DATA_NUM_RECV_WQES, - hack_ipoib_qkey ) ) != 0 ) { + ipoib->data_qkey ) ) != 0 ) { DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n", ipoib, strerror ( rc ) ); goto err_create_data_qset; @@ -784,6 +920,7 @@ int ipoib_probe ( struct ib_device *ibdev ) { err_register_netdev: ipoib_destroy_qset ( ipoib, &ipoib->data ); + err_join_broadcast_group: err_create_data_qset: ipoib_destroy_qset ( ipoib, &ipoib->meta ); err_create_meta_qset: diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 920874cc..dc497add 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -975,6 +975,7 @@ static int arbel_complete ( struct ib_device *ibdev, */ static void arbel_drain_eq ( struct arbel *arbel ) { #warning "drain the event queue" + drain_eq(); } /** diff --git a/src/include/gpxe/infiniband.h b/src/include/gpxe/infiniband.h index 8e358bce..06745ba9 100644 --- a/src/include/gpxe/infiniband.h +++ b/src/include/gpxe/infiniband.h @@ -11,10 +11,13 @@ #include /** Subnet administrator QPN */ -#define IB_SA_QPN 1 +#define IB_SA_QPN 1 + +/** Broadcast QPN */ +#define IB_BROADCAST_QPN 0xffffffUL /** Subnet administrator queue key */ -#define IB_GLOBAL_QKEY 0x80010000UL +#define IB_GLOBAL_QKEY 0x80010000UL /** An Infiniband Global Identifier */ struct ib_gid { @@ -466,7 +469,7 @@ struct ib_mad_hdr { uint8_t method; uint16_t status; uint16_t class_specific; - uint64_t tid; + uint32_t tid[2]; uint16_t attr_id; uint16_t resv; uint32_t attr_mod; @@ -569,6 +572,7 @@ union ib_mad { struct ib_mad_port_info port_info; struct ib_mad_pkey_table pkey_table; struct ib_mad_path_record path_record; + struct ib_mad_mc_member_record mc_member_record; } __attribute__ (( packed )); #endif /* _GPXE_INFINIBAND_H */ From 2c56ede6f80127b1a352f4bafc94821fa98f127e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 18:38:04 +0100 Subject: [PATCH 59/84] Moved iobuf.h assertions outside the static inline functions, so that the assert message's file and line number gives some clue as to the real location of the problem. Added similar assertions to list.h. --- src/include/gpxe/iobuf.h | 28 ++++++++++++++++++++++++---- src/include/gpxe/list.h | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/include/gpxe/iobuf.h b/src/include/gpxe/iobuf.h index e3db01ac..ff787754 100644 --- a/src/include/gpxe/iobuf.h +++ b/src/include/gpxe/iobuf.h @@ -67,9 +67,13 @@ struct io_buffer { static inline void * iob_reserve ( struct io_buffer *iobuf, size_t len ) { iobuf->data += len; iobuf->tail += len; - assert ( iobuf->tail <= iobuf->end ); return iobuf->data; } +#define iob_reserve( iobuf, len ) ( { \ + void *__result; \ + __result = iob_reserve ( (iobuf), (len) ); \ + assert ( (iobuf)->tail <= (iobuf)->end ); \ + __result; } ) /** * Add data to start of I/O buffer @@ -80,9 +84,13 @@ static inline void * iob_reserve ( struct io_buffer *iobuf, size_t len ) { */ static inline void * iob_push ( struct io_buffer *iobuf, size_t len ) { iobuf->data -= len; - assert ( iobuf->data >= iobuf->head ); return iobuf->data; } +#define iob_push( iobuf, len ) ( { \ + void *__result; \ + __result = iob_push ( (iobuf), (len) ); \ + assert ( (iobuf)->data >= (iobuf)->head ); \ + __result; } ) /** * Remove data from start of I/O buffer @@ -96,6 +104,11 @@ static inline void * iob_pull ( struct io_buffer *iobuf, size_t len ) { assert ( iobuf->data <= iobuf->tail ); return iobuf->data; } +#define iob_pull( iobuf, len ) ( { \ + void *__result; \ + __result = iob_pull ( (iobuf), (len) ); \ + assert ( (iobuf)->data <= (iobuf)->tail ); \ + __result; } ) /** * Add data to end of I/O buffer @@ -107,9 +120,13 @@ static inline void * iob_pull ( struct io_buffer *iobuf, size_t len ) { static inline void * iob_put ( struct io_buffer *iobuf, size_t len ) { void *old_tail = iobuf->tail; iobuf->tail += len; - assert ( iobuf->tail <= iobuf->end ); return old_tail; } +#define iob_put( iobuf, len ) ( { \ + void *__result; \ + __result = iob_put ( (iobuf), (len) ); \ + assert ( (iobuf)->tail <= (iobuf)->end ); \ + __result; } ) /** * Remove data from end of I/O buffer @@ -119,8 +136,11 @@ static inline void * iob_put ( struct io_buffer *iobuf, size_t len ) { */ static inline void iob_unput ( struct io_buffer *iobuf, size_t len ) { iobuf->tail -= len; - assert ( iobuf->tail >= iobuf->data ); } +#define iob_unput( iobuf, len ) do { \ + iob_unput ( (iobuf), (len) ); \ + assert ( (iobuf)->tail >= (iobuf)->data ); \ + } while ( 0 ) /** * Empty an I/O buffer diff --git a/src/include/gpxe/list.h b/src/include/gpxe/list.h index 0e65901c..602382be 100644 --- a/src/include/gpxe/list.h +++ b/src/include/gpxe/list.h @@ -10,6 +10,7 @@ */ #include +#include /* * Simple doubly linked list implementation. @@ -62,6 +63,11 @@ static inline void __list_add ( struct list_head *new, static inline void list_add ( struct list_head *new, struct list_head *head ) { __list_add ( new, head, head->next ); } +#define list_add( new, head ) do { \ + assert ( (head)->next->prev == (head) ); \ + assert ( (head)->prev->next == (head) ); \ + list_add ( (new), (head) ); \ + } while ( 0 ) /** * Add a new entry to the tail of a list @@ -76,6 +82,11 @@ static inline void list_add_tail ( struct list_head *new, struct list_head *head ) { __list_add ( new, head->prev, head ); } +#define list_add_tail( new, head ) do { \ + assert ( (head)->next->prev == (head) ); \ + assert ( (head)->prev->next == (head) ); \ + list_add_tail ( (new), (head) ); \ + } while ( 0 ) /* * Delete a list entry by making the prev/next entries @@ -101,6 +112,13 @@ static inline void __list_del ( struct list_head * prev, static inline void list_del ( struct list_head *entry ) { __list_del ( entry->prev, entry->next ); } +#define list_del( entry ) do { \ + assert ( (entry)->prev != NULL ); \ + assert ( (entry)->next != NULL ); \ + assert ( (entry)->next->prev == (entry) ); \ + assert ( (entry)->prev->next == (entry) ); \ + list_del ( (entry) ); \ + } while ( 0 ) /** * Test whether a list is empty From 387a1a85561bd828d82bc4b69ca18dab5fc10831 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 18:57:32 +0100 Subject: [PATCH 60/84] Minor debug message improvement. --- src/net/netdevice.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/net/netdevice.c b/src/net/netdevice.c index 460de89c..59a50205 100644 --- a/src/net/netdevice.c +++ b/src/net/netdevice.c @@ -476,8 +476,9 @@ static void net_step ( struct process *process __unused ) { * NIC faster than they arrive. */ if ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) { - DBGC ( netdev, "NETDEV %p processing %p\n", - netdev, iobuf ); + DBGC ( netdev, "NETDEV %p processing %p (%p+%zx)\n", + netdev, iobuf, iobuf->data, + iob_len ( iobuf ) ); netdev->ll_protocol->rx ( iobuf, netdev ); } } From 267a4483abade6f352263cf159052608a0c03833 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 19:01:10 +0100 Subject: [PATCH 61/84] Added an almost obscene amount of debugging and assertion code while tracking down a bug that turned out to be a free_iob() used where I needed a netdev_tx_complete(). This left the freed I/O buffer on the net device's TX list, with bad, bad consequences later. Also fixed the bug in question. --- src/drivers/net/ipoib.c | 22 ++++++++++++------ src/drivers/net/mlx_ipoib/mt25218.c | 36 +++++++++++++++++++++++++++-- src/net/infiniband.c | 22 ++++++++++++------ 3 files changed, 64 insertions(+), 16 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index 077912b3..f45012ee 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -47,22 +47,22 @@ extern struct ib_address_vector hack_ipoib_bcast_av; #define IPOIB_MTU 2048 /** Number of IPoIB data send work queue entries */ -#define IPOIB_DATA_NUM_SEND_WQES 4 +#define IPOIB_DATA_NUM_SEND_WQES 2 /** Number of IPoIB data receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 4 +#define IPOIB_DATA_NUM_RECV_WQES 2 /** Number of IPoIB data completion entries */ -#define IPOIB_DATA_NUM_CQES 8 +#define IPOIB_DATA_NUM_CQES 32 /** Number of IPoIB metadata send work queue entries */ -#define IPOIB_META_NUM_SEND_WQES 4 +#define IPOIB_META_NUM_SEND_WQES 2 /** Number of IPoIB metadata receive work queue entries */ -#define IPOIB_META_NUM_RECV_WQES 4 +#define IPOIB_META_NUM_RECV_WQES 2 /** Number of IPoIB metadata completion entries */ -#define IPOIB_META_NUM_CQES 8 +#define IPOIB_META_NUM_CQES 32 /** An IPoIB queue set */ struct ipoib_queue_set { @@ -205,6 +205,14 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { } /* Strip off IPoIB header */ + int len = iob_len ( iobuf ); + DBG ( "WTF iob_len = %zd\n", len ); + if ( len < 0 ) { + DBG_HD ( iobuf, sizeof ( *iobuf ) ); + DBG ( "locking\n" ); + while ( 1 ) {} + } + iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); /* Hand off to network-layer protocol */ @@ -492,7 +500,7 @@ static int ipoib_transmit ( struct net_device *netdev, /* No path entry - get path record */ rc = ipoib_get_path_record ( ipoib, &ipoib_pshdr->peer.gid ); - free_iob ( iobuf ); + netdev_tx_complete ( netdev, iobuf ); return rc; } av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn ); diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index dc497add..8afee52f 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -398,6 +398,9 @@ static int arbel_create_cq ( struct ib_device *ibdev, goto err_sw2hw_cq; } + DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n", + arbel, cq->cqn, arbel_cq->cqe, + ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) ); cq->dev_priv = arbel_cq; return 0; @@ -650,6 +653,12 @@ static int arbel_create_qp ( struct ib_device *ibdev, goto err_rtr2rts_qpee; } + DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->send.wqe, + ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) ); + DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->recv.wqe, + ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) ); qp->dev_priv = arbel_qp; return 0; @@ -904,6 +913,7 @@ static int arbel_complete ( struct ib_device *ibdev, struct arbel_queue_pair *arbel_qp; struct arbel_send_work_queue *arbel_send_wq; struct arbel_recv_work_queue *arbel_recv_wq; + struct arbelprm_recv_wqe *recv_wqe; struct io_buffer *iobuf; ib_completer_t complete; unsigned int opcode; @@ -915,7 +925,6 @@ static int arbel_complete ( struct ib_device *ibdev, /* Parse completion */ memset ( &completion, 0, sizeof ( completion ) ); - completion.len = MLX_GET ( &cqe->normal, byte_cnt ); qpn = MLX_GET ( &cqe->normal, my_qpn ); is_send = MLX_GET ( &cqe->normal, s ); wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); @@ -946,10 +955,12 @@ static int arbel_complete ( struct ib_device *ibdev, arbel_send_wq = &arbel_qp->send; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); + assert ( wqe_idx < qp->send.num_wqes ); } else { arbel_recv_wq = &arbel_qp->recv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); + assert ( wqe_idx < qp->recv.num_wqes ); } /* Identify I/O buffer */ @@ -961,6 +972,27 @@ static int arbel_complete ( struct ib_device *ibdev, } wq->iobufs[wqe_idx] = NULL; + /* Fill in length for received packets */ + if ( ! is_send ) { + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv; + assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) == + virt_to_bus ( iobuf->data ) ); + assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == + iob_tailroom ( iobuf ) ); + DBG ( "CPQ %lx QPN %lx WQE %x\n", cq->cqn, qp->qpn, wqe_idx ); + // DBG_HD ( iobuf, sizeof ( *iobuf ) ); + MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); + MLX_FILL_1 ( &recv_wqe->data[0], 1, + l_key, ARBEL_INVALID_LKEY ); + if ( completion.len > iob_tailroom ( iobuf ) ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x " + "overlength received packet length %zd\n", + arbel, cq->cqn, qpn, wqe_idx, completion.len ); + return -EIO; + } + } + /* Pass off to caller's completion handler */ complete = ( is_send ? complete_send : complete_recv ); complete ( ibdev, qp, &completion, iobuf ); @@ -1252,7 +1284,7 @@ static int arbel_get_sm_lid ( struct arbel *arbel, return 0; } -static int arbel_get_pkey ( struct arbel *arbel, unsigned long *pkey ) { +static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { struct ib_mad_pkey_table pkey_table; int rc; diff --git a/src/net/infiniband.c b/src/net/infiniband.c index 9d38767f..ed186d18 100644 --- a/src/net/infiniband.c +++ b/src/net/infiniband.c @@ -64,8 +64,8 @@ struct ib_completion_queue * ib_create_cq ( struct ib_device *ibdev, return NULL; } - DBGC ( ibdev, "IBDEV %p created completion queue %#lx\n", - ibdev, cq->cqn ); + DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) " + "with CQN %#lx\n", ibdev, num_cqes, cq, cq->dev_priv, cq->cqn ); return cq; } @@ -102,14 +102,16 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, struct ib_completion_queue *recv_cq, unsigned long qkey ) { struct ib_queue_pair *qp; + size_t total_size; int rc; DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev ); /* Allocate and initialise data structure */ - qp = zalloc ( sizeof ( *qp ) + - ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + - ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + total_size = ( sizeof ( *qp ) + + ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) + + ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) ); + qp = zalloc ( total_size ); if ( ! qp ) return NULL; qp->qkey = qkey; @@ -134,8 +136,14 @@ struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev, return NULL; } - DBGC ( ibdev, "IBDEV %p created queue pair %#lx\n", - ibdev, qp->qpn ); + DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n", + ibdev, qp, qp->dev_priv, qp->qpn ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->send.iobufs, + qp->recv.iobufs ); + DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n", + ibdev, qp->qpn, num_send_wqes, qp->recv.iobufs, + ( ( ( void * ) qp ) + total_size ) ); return qp; } From 11541b1e01efa6aab9955675c4145d6728b88d75 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 19:12:06 +0100 Subject: [PATCH 62/84] Remove hacks, and fix leaving the multicast GID. --- src/drivers/net/ipoib.c | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index f45012ee..e5bee93d 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -34,15 +34,6 @@ * IP over Infiniband */ - - - - -extern unsigned long hack_ipoib_qkey; -extern struct ib_address_vector hack_ipoib_bcast_av; - - - /** IPoIB MTU */ #define IPOIB_MTU 2048 @@ -205,14 +196,6 @@ static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) { } /* Strip off IPoIB header */ - int len = iob_len ( iobuf ); - DBG ( "WTF iob_len = %zd\n", len ); - if ( len < 0 ) { - DBG_HD ( iobuf, sizeof ( *iobuf ) ); - DBG ( "locking\n" ); - while ( 1 ) {} - } - iob_pull ( iobuf, sizeof ( *ipoib_hdr ) ); /* Hand off to network-layer protocol */ @@ -487,9 +470,6 @@ static int ipoib_transmit ( struct net_device *netdev, av.gid_present = 1; if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) { /* Broadcast address */ -#if 0 - memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) ); -#endif av.dest_qp = IB_BROADCAST_QPN; av.dlid = ipoib->broadcast_lid; gid = &ipoib->broadcast_gid; @@ -798,7 +778,7 @@ static void ipoib_close ( struct net_device *netdev ) { struct ib_device *ibdev = ipoib->ibdev; /* Detach from broadcast multicast GID */ - ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib_broadcast.gid ); + ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib->broadcast_gid ); /* FIXME: should probably flush the receive ring */ } @@ -893,10 +873,6 @@ int ipoib_probe ( struct ib_device *ibdev ) { goto err_create_meta_qset; } -#if 0 - ipoib->data_qkey = hack_ipoib_qkey; -#endif - /* Join broadcast group */ if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) { DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n", From 12b4cb964bb8240d4f5c43a9f2aac96d3163ca3b Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 19:17:55 +0100 Subject: [PATCH 63/84] Removed more hacks, and adjusted number of queue entries for likely usage. --- src/drivers/net/ipoib.c | 6 +++--- src/drivers/net/mlx_ipoib/mt25218.c | 19 ------------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/drivers/net/ipoib.c b/src/drivers/net/ipoib.c index e5bee93d..784c0720 100644 --- a/src/drivers/net/ipoib.c +++ b/src/drivers/net/ipoib.c @@ -41,10 +41,10 @@ #define IPOIB_DATA_NUM_SEND_WQES 2 /** Number of IPoIB data receive work queue entries */ -#define IPOIB_DATA_NUM_RECV_WQES 2 +#define IPOIB_DATA_NUM_RECV_WQES 4 /** Number of IPoIB data completion entries */ -#define IPOIB_DATA_NUM_CQES 32 +#define IPOIB_DATA_NUM_CQES 8 /** Number of IPoIB metadata send work queue entries */ #define IPOIB_META_NUM_SEND_WQES 2 @@ -53,7 +53,7 @@ #define IPOIB_META_NUM_RECV_WQES 2 /** Number of IPoIB metadata completion entries */ -#define IPOIB_META_NUM_CQES 32 +#define IPOIB_META_NUM_CQES 8 /** An IPoIB queue set */ struct ipoib_queue_set { diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 8afee52f..452ae0b5 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -30,11 +30,6 @@ Skeleton NIC driver for Etherboot #include "arbel.h" -struct ib_address_vector hack_ipoib_bcast_av; - - - - @@ -980,8 +975,6 @@ static int arbel_complete ( struct ib_device *ibdev, virt_to_bus ( iobuf->data ) ); assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == iob_tailroom ( iobuf ) ); - DBG ( "CPQ %lx QPN %lx WQE %x\n", cq->cqn, qp->qpn, wqe_idx ); - // DBG_HD ( iobuf, sizeof ( *iobuf ) ); MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); MLX_FILL_1 ( &recv_wqe->data[0], 1, l_key, ARBEL_INVALID_LKEY ); @@ -1368,18 +1361,6 @@ static int arbel_probe ( struct pci_device *pci, goto err_get_pkey; } - struct ud_av_st *bcast_av = ib_data.bcast_av; - struct arbelprm_ud_address_vector *bav = - ( struct arbelprm_ud_address_vector * ) &bcast_av->av; - struct ib_address_vector *av = &hack_ipoib_bcast_av; - av->dest_qp = bcast_av->dest_qp; - av->qkey = bcast_av->qkey; - av->dlid = MLX_GET ( bav, rlid ); - av->rate = ( MLX_GET ( bav, max_stat_rate ) ? 1 : 4 ); - av->sl = MLX_GET ( bav, sl ); - av->gid_present = 1; - memcpy ( &av->gid, ( ( void * ) bav ) + 16, 16 ); - /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", From f3fcb53faf14079954e0f65edac931b09819b03c Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 19:47:19 +0100 Subject: [PATCH 64/84] Don't use the mailboxless version of the HW2SW_CQ command; it seems to crash the machine. --- src/drivers/net/mlx_ipoib/mt25218.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 452ae0b5..699c9a2d 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -225,10 +225,12 @@ arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, } static inline int -arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn ) { +arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn, + struct arbelprm_completion_queue_context *cqctx) { return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_CQ ), - 1, NULL, cqn, NULL ); + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_HW2SW_CQ, + 1, sizeof ( *cqctx ) ), + 0, NULL, cqn, cqctx ); } static inline int @@ -421,13 +423,14 @@ static void arbel_destroy_cq ( struct ib_device *ibdev, struct ib_completion_queue *cq ) { struct arbel *arbel = ibdev->dev_priv; struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbelprm_completion_queue_context cqctx; struct arbelprm_cq_ci_db_record *ci_db_rec; struct arbelprm_cq_arm_db_record *arm_db_rec; int cqn_offset; int rc; /* Take ownership back from hardware */ - if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn ) ) != 0 ) { + if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " "%s\n", arbel, cq->cqn, strerror ( rc ) ); /* Leak memory and return; at least we avoid corruption */ From 0f62bcaa775cf1c0e057a6ed4e4741df69efb49b Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 21:14:09 +0100 Subject: [PATCH 65/84] Start migrating the remaining initialisation steps. --- src/drivers/net/mlx_ipoib/arbel.h | 11 +++++ src/drivers/net/mlx_ipoib/mt25218.c | 63 +++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 4 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 28893f51..407f29d5 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -12,6 +12,13 @@ * */ +/* PCI BARs */ +#define ARBEL_PCI_CONFIG_BAR PCI_BASE_ADDRESS_0 +#define ARBEL_PCI_CONFIG_BAR_SIZE 0x100000 +#define ARBEL_PCI_UAR_BAR PCI_BASE_ADDRESS_2 +#define ARBEL_PCI_UAR_IDX 1 +#define ARBEL_PCI_UAR_SIZE 0x1000 + /* UAR context table (UCE) resource types */ #define ARBEL_UAR_RES_NONE 0x00 #define ARBEL_UAR_RES_CQ_CI 0x01 @@ -27,6 +34,7 @@ /* HCA command register opcodes */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_QUERY_FW 0x0004 #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 #define ARBEL_HCR_RST2INIT_QPEE 0x0019 @@ -74,6 +82,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_fw ); struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); @@ -270,6 +279,8 @@ struct arbel { #define ARBEL_HCR_BASE 0x80680 #define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) #define ARBEL_HCR_MAX_WAIT_MS 2000 +#define ARBEL_MBOX_ALIGN 4096 +#define ARBEL_MBOX_SIZE 512 /* HCA command is split into * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 699c9a2d..c7cc8c76 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -133,6 +133,9 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, unsigned int i; int rc; + assert ( in_len <= ARBEL_MBOX_SIZE ); + assert ( out_len <= ARBEL_MBOX_SIZE ); + DBGC2 ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", arbel, opcode, in_len, ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, @@ -210,11 +213,19 @@ static inline int arbel_cmd_query_dev_lim ( struct arbel *arbel, struct arbelprm_query_dev_lim *dev_lim ) { return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, 1, sizeof ( *dev_lim ) ), 0, NULL, 0, dev_lim ); } +static inline int +arbel_cmd_query_fw ( struct arbel *arbel, struct arbelprm_query_fw *fw ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_FW, + 1, sizeof ( *fw ) ), + 0, NULL, 0, fw ); +} + static inline int arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, const struct arbelprm_completion_queue_context *cqctx ) { @@ -1300,6 +1311,7 @@ static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { static int arbel_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct ib_device *ibdev; + struct arbelprm_query_fw fw; struct arbelprm_query_dev_lim dev_lim; struct arbel *arbel; udqp_t qph; @@ -1307,26 +1319,59 @@ static int arbel_probe ( struct pci_device *pci, /* Allocate Infiniband device */ ibdev = alloc_ibdev ( sizeof ( *arbel ) ); - if ( ! ibdev ) - return -ENOMEM; + if ( ! ibdev ) { + rc = -ENOMEM; + goto err_ibdev; + } ibdev->op = &arbel_ib_operations; pci_set_drvdata ( pci, ibdev ); ibdev->dev = &pci->dev; arbel = ibdev->dev_priv; memset ( arbel, 0, sizeof ( *arbel ) ); + /* Allocate space for mailboxes */ + arbel->mailbox_in = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_in ) { + rc = -ENOMEM; + goto err_mailbox_in; + } + arbel->mailbox_out = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_out ) { + rc = -ENOMEM; + goto err_mailbox_out; + } + /* Fix up PCI device */ adjust_pci_device ( pci ); + /* Get PCI BARs */ + arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), + ARBEL_PCI_CONFIG_BAR_SIZE ); + arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + + ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), + ARBEL_PCI_UAR_SIZE ); + + /* Initialise firmware */ + if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not query firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_query_fw; + } + DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, + MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), + MLX_GET ( &fw, fw_rev_subminor ) ); + /* Initialise hardware */ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ib_driver_init; /* Hack up IB structures */ +#if 0 arbel->config = memfree_pci_dev.cr_space; + arbel->uar = memfree_pci_dev.uar; arbel->mailbox_in = dev_buffers_p->inprm_buf; arbel->mailbox_out = dev_buffers_p->outprm_buf; - arbel->uar = memfree_pci_dev.uar; +#endif arbel->db_rec = dev_ib_data.uar_context_base; arbel->reserved_lkey = dev_ib_data.mkey; arbel->eqn = dev_ib_data.eq.eqn; @@ -1380,7 +1425,13 @@ static int arbel_probe ( struct pci_device *pci, err_query_dev_lim: ib_driver_close ( 0 ); err_ib_driver_init: + err_query_fw: + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + err_mailbox_out: + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + err_mailbox_in: free_ibdev ( ibdev ); + err_ibdev: return rc; } @@ -1391,9 +1442,13 @@ static int arbel_probe ( struct pci_device *pci, */ static void arbel_remove ( struct pci_device *pci ) { struct ib_device *ibdev = pci_get_drvdata ( pci ); + struct arbel *arbel = ibdev->dev_priv; ipoib_remove ( ibdev ); ib_driver_close ( 0 ); + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + free_ibdev ( ibdev ); } static struct pci_device_id arbel_nics[] = { From dc3165b746641c8595c1ee574bf63ddc2eb274e9 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 21:59:41 +0100 Subject: [PATCH 66/84] Starting the firmware directly now works. --- src/drivers/net/mlx_ipoib/arbel.h | 17 ++- src/drivers/net/mlx_ipoib/mt25218.c | 166 ++++++++++++++++++++++++---- 2 files changed, 159 insertions(+), 24 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 407f29d5..564b669e 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -7,6 +7,9 @@ * */ +#include +#include + /* * Hardware constants * @@ -45,6 +48,9 @@ #define ARBEL_HCR_READ_MGM 0x0025 #define ARBEL_HCR_WRITE_MGM 0x0026 #define ARBEL_HCR_MGID_HASH 0x0027 +#define ARBEL_HCR_RUN_FW 0x0ff6 +#define ARBEL_HCR_UNMAP_FA 0x0ffe +#define ARBEL_HCR_MAP_FA 0x0fff /* Service types */ #define ARBEL_ST_UD 0x03 @@ -87,6 +93,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); +struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); @@ -240,15 +247,19 @@ typedef uint32_t arbel_bitmask_t; /** An Arbel device */ struct arbel { - /** Configuration registers */ + /** PCI configuration registers */ void *config; + /** PCI user Access Region */ + void *uar; + /** Command input mailbox */ void *mailbox_in; /** Command output mailbox */ void *mailbox_out; - /** User Access Region */ - void *uar; + /** Firmware area in external memory */ + userptr_t firmware_area; + /** Doorbell records */ union arbelprm_doorbell_record *db_rec; /** Reserved LKey diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index c7cc8c76..26677ab6 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -315,6 +315,29 @@ arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, 0, gid, 0, hash ); } +static inline int +arbel_cmd_run_fw ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_RUN_FW ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_unmap_fa ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_FA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_fa ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map_fa ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_FA, + 1, sizeof ( *map_fa ) ), + 0, map_fa, 1, NULL ); +} + /*************************************************************************** * * Completion queue operations @@ -958,15 +981,15 @@ static int arbel_complete ( struct ib_device *ibdev, } qp = wq->qp; arbel_qp = qp->dev_priv; + arbel_send_wq = &arbel_qp->send; + arbel_recv_wq = &arbel_qp->recv; /* Identify work queue entry index */ if ( is_send ) { - arbel_send_wq = &arbel_qp->send; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / sizeof ( arbel_send_wq->wqe[0] ) ); assert ( wqe_idx < qp->send.num_wqes ); } else { - arbel_recv_wq = &arbel_qp->recv; wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / sizeof ( arbel_recv_wq->wqe[0] ) ); assert ( wqe_idx < qp->recv.num_wqes ); @@ -1177,6 +1200,12 @@ static struct ib_device_operations arbel_ib_operations = { .mcast_detach = arbel_mcast_detach, }; +/*************************************************************************** + * + * MAD IFC operations + * + *************************************************************************** + */ static int arbel_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { @@ -1301,6 +1330,102 @@ static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { return 0; } +/*************************************************************************** + * + * Firmware control + * + *************************************************************************** + */ + +/** + * Start firmware running + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_start_firmware ( struct arbel *arbel ) { + struct arbelprm_query_fw fw; + struct arbelprm_virtual_physical_mapping map_fa; + unsigned int fw_pages; + unsigned int log2_fw_pages; + size_t fw_size; + physaddr_t fw_base; + int rc; + + /* Get firmware parameters */ + if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not query firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_query_fw; + } + DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, + MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), + MLX_GET ( &fw, fw_rev_subminor ) ); + fw_pages = MLX_GET ( &fw, fw_pages ); + log2_fw_pages = fls ( fw_pages - 1 ); + fw_pages = ( 1 << log2_fw_pages ); + DBGC ( arbel, "Arbel %p requires %d kB for firmware\n", + arbel, ( fw_pages * 4 ) ); + + /* Allocate firmware pages and map firmware area */ + fw_size = ( fw_pages * 4096 ); + arbel->firmware_area = umalloc ( fw_size ); + if ( ! arbel->firmware_area ) { + rc = -ENOMEM; + goto err_alloc_fa; + } + fw_base = ( user_to_phys ( arbel->firmware_area, fw_size ) & + ~( fw_size - 1 ) ); + DBGC ( arbel, "Arbel %p firmware area at physical [%lx,%lx)\n", + arbel, fw_base, ( fw_base + fw_size ) ); + memset ( &map_fa, 0, sizeof ( map_fa ) ); + MLX_FILL_2 ( &map_fa, 3, + log2size, log2_fw_pages, + pa_l, ( fw_base >> 12 ) ); + if ( ( rc = arbel_cmd_map_fa ( arbel, &map_fa ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_map_fa; + } + + /* Start firmware */ + if ( ( rc = arbel_cmd_run_fw ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not run firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_run_fw; + } + + DBGC ( arbel, "Arbel %p firmware started\n", arbel ); + return 0; + + err_run_fw: + arbel_cmd_unmap_fa ( arbel ); + err_map_fa: + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; + err_alloc_fa: + err_query_fw: + return rc; +} + +/** + * Stop firmware running + * + * @v arbel Arbel device + */ +static void arbel_stop_firmware ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_unmap_fa ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL could not stop firmware: %s\n", + arbel, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; +} + /** * Probe PCI device * @@ -1311,7 +1436,6 @@ static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { static int arbel_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct ib_device *ibdev; - struct arbelprm_query_fw fw; struct arbelprm_query_dev_lim dev_lim; struct arbel *arbel; udqp_t qph; @@ -1329,6 +1453,16 @@ static int arbel_probe ( struct pci_device *pci, arbel = ibdev->dev_priv; memset ( arbel, 0, sizeof ( *arbel ) ); + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Get PCI BARs */ + arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), + ARBEL_PCI_CONFIG_BAR_SIZE ); + arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + + ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), + ARBEL_PCI_UAR_SIZE ); + /* Allocate space for mailboxes */ arbel->mailbox_in = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); if ( ! arbel->mailbox_in ) { @@ -1341,25 +1475,12 @@ static int arbel_probe ( struct pci_device *pci, goto err_mailbox_out; } - /* Fix up PCI device */ - adjust_pci_device ( pci ); + /* Start firmware */ + if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) + goto err_start_firmware; - /* Get PCI BARs */ - arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), - ARBEL_PCI_CONFIG_BAR_SIZE ); - arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + - ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), - ARBEL_PCI_UAR_SIZE ); - /* Initialise firmware */ - if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not query firmware: %s\n", - arbel, strerror ( rc ) ); - goto err_query_fw; - } - DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, - MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), - MLX_GET ( &fw, fw_rev_subminor ) ); + while ( 1 ) {} /* Initialise hardware */ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) @@ -1425,7 +1546,9 @@ static int arbel_probe ( struct pci_device *pci, err_query_dev_lim: ib_driver_close ( 0 ); err_ib_driver_init: - err_query_fw: + + arbel_stop_firmware ( arbel ); + err_start_firmware: free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); err_mailbox_out: free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); @@ -1446,6 +1569,7 @@ static void arbel_remove ( struct pci_device *pci ) { ipoib_remove ( ibdev ); ib_driver_close ( 0 ); + arbel_stop_firmware ( arbel ); free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); free_ibdev ( ibdev ); From c818d47e258ae0571ff3dee143a1b76059fbcb30 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Mon, 17 Sep 2007 22:25:46 +0100 Subject: [PATCH 67/84] Restructured bits of initialisation (not testable at present) --- src/drivers/net/mlx_ipoib/arbel.h | 18 +++- src/drivers/net/mlx_ipoib/mt25218.c | 140 ++++++++++++++++++++-------- 2 files changed, 116 insertions(+), 42 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 564b669e..fb4264c0 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -156,12 +156,20 @@ union arbelprm_mad { /** Arbel device limits */ struct arbel_dev_limits { - /** Number of reserver UARs */ - unsigned long reserved_uars; - /** Number of reserved CQs */ - unsigned long reserved_cqs; /** Number of reserved QPs */ - unsigned long reserved_qps; + unsigned int reserved_qps; + /** Number of reserved EEs */ + unsigned int reserved_ees; + /** Number of reserved MPTs */ + unsigned int reserved_mpts; + /** Number of reserved MTTs */ + unsigned int reserved_mtts; + /** Number of reserved CQs */ + unsigned int reserved_cqs; + /** Number of reserved SRQs */ + unsigned int reserved_srqs; + /** Number of reserver UARs */ + unsigned int reserved_uars; }; /** Alignment of Arbel send work queue entries */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 26677ab6..b9d2835d 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1330,6 +1330,40 @@ static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { return 0; } +/** + * Get MAD parameters + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_mad_params ( struct ib_device *ibdev ) { + struct arbel *arbel = ibdev->dev_priv; + int rc; + + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + return rc; + } + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + /* Get partition key */ + if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine partition key: " + "%s\n", arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + /*************************************************************************** * * Firmware control @@ -1426,6 +1460,62 @@ static void arbel_stop_firmware ( struct arbel *arbel ) { arbel->firmware_area = UNULL; } +/*************************************************************************** + * + * Infinihost Context Memory management + * + *************************************************************************** + */ + +/** + * Get device limits + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_limits ( struct arbel *arbel ) { + struct arbelprm_query_dev_lim dev_lim; + int rc; + + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + arbel->limits.reserved_ees = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); + arbel->limits.reserved_mtts = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.reserved_srqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_srqs ) ); + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + + return 0; +} + +/** + * Allocate ICM areas + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_alloc_icm ( struct arbel *arbel ) { + + return 0; +} + +/*************************************************************************** + * + * PCI interface + * + *************************************************************************** + */ + /** * Probe PCI device * @@ -1436,9 +1526,7 @@ static void arbel_stop_firmware ( struct arbel *arbel ) { static int arbel_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct ib_device *ibdev; - struct arbelprm_query_dev_lim dev_lim; struct arbel *arbel; - udqp_t qph; int rc; /* Allocate Infiniband device */ @@ -1479,15 +1567,19 @@ static int arbel_probe ( struct pci_device *pci, if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) goto err_start_firmware; + /* Get device limits */ + if ( ( rc = arbel_get_limits ( arbel ) ) != 0 ) + goto err_get_limits; while ( 1 ) {} +#if 0 /* Initialise hardware */ if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) goto err_ib_driver_init; /* Hack up IB structures */ -#if 0 + arbel->config = memfree_pci_dev.cr_space; arbel->uar = memfree_pci_dev.uar; arbel->mailbox_in = dev_buffers_p->inprm_buf; @@ -1497,38 +1589,11 @@ static int arbel_probe ( struct pci_device *pci, arbel->reserved_lkey = dev_ib_data.mkey; arbel->eqn = dev_ib_data.eq.eqn; - /* Get device limits */ - if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get device limits: %s\n", - arbel, strerror ( rc ) ); - goto err_query_dev_lim; - } - arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); - arbel->limits.reserved_cqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); - arbel->limits.reserved_qps = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - /* Get subnet manager LID */ - if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine subnet manager " - "LID: %s\n", arbel, strerror ( rc ) ); - goto err_get_sm_lid; - } + /* Get MAD parameters */ + if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) + goto err_get_mad_params; - /* Get port GID */ - if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", - arbel, strerror ( rc ) ); - goto err_get_port_gid; - } - - /* Get partition key */ - if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine partition key: " - "%s\n", arbel, strerror ( rc ) ); - goto err_get_pkey; - } /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { @@ -1540,13 +1605,12 @@ static int arbel_probe ( struct pci_device *pci, return 0; err_ipoib_probe: - err_get_pkey: - err_get_port_gid: - err_get_sm_lid: - err_query_dev_lim: + err_get_mad_params: ib_driver_close ( 0 ); err_ib_driver_init: + + err_get_limits: arbel_stop_firmware ( arbel ); err_start_firmware: free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); @@ -1567,6 +1631,8 @@ static void arbel_remove ( struct pci_device *pci ) { struct ib_device *ibdev = pci_get_drvdata ( pci ); struct arbel *arbel = ibdev->dev_priv; + +#warning "check error sequence for probe()" ipoib_remove ( ibdev ); ib_driver_close ( 0 ); arbel_stop_firmware ( arbel ); From af88f602281aa532533f3d00baa8168d4f03cf35 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 01:49:39 +0100 Subject: [PATCH 68/84] Started with the ICM partitioning code. --- src/drivers/net/mlx_ipoib/arbel.h | 4 +- src/drivers/net/mlx_ipoib/mt25218.c | 116 +++++++++++++++++++++++++++- 2 files changed, 116 insertions(+), 4 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index fb4264c0..adcab36c 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -38,6 +38,7 @@ /* HCA command register opcodes */ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_QUERY_FW 0x0004 +#define ARBEL_HCR_INIT_HCA 0x0007 #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 #define ARBEL_HCR_RST2INIT_QPEE 0x0019 @@ -82,6 +83,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); @@ -160,8 +162,6 @@ struct arbel_dev_limits { unsigned int reserved_qps; /** Number of reserved EEs */ unsigned int reserved_ees; - /** Number of reserved MPTs */ - unsigned int reserved_mpts; /** Number of reserved MTTs */ unsigned int reserved_mtts; /** Number of reserved CQs */ diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index b9d2835d..da77a03d 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -226,6 +226,15 @@ arbel_cmd_query_fw ( struct arbel *arbel, struct arbelprm_query_fw *fw ) { 0, NULL, 0, fw ); } +static inline int +arbel_cmd_init_hca ( struct arbel *arbel, + const struct arbelprm_init_hca *init_hca ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_HCA, + 1, sizeof ( *init_hca ) ), + 0, init_hca, 0, NULL ); +} + static inline int arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, const struct arbelprm_completion_queue_context *cqctx ) { @@ -1499,13 +1508,116 @@ static int arbel_get_limits ( struct arbel *arbel ) { } /** - * Allocate ICM areas + * Get ICM usage + * + * @v log_num_entries Log2 of the number of entries + * @v entry_size Entry size + * @ret usage Usage size in ICM + */ +static size_t icm_usage ( unsigned int log_num_entries, size_t entry_size ) { + size_t usage; + + usage = ( ( 1 << log_num_entries ) * entry_size ); + usage = ( ( usage + 4095 ) & ~4095 ); + return usage; +} + +/** + * Partition ICM * * @v arbel Arbel device * @ret rc Return status code */ static int arbel_alloc_icm ( struct arbel *arbel ) { - + struct arbelprm_init_hca init_hca; + size_t icm_offset = 0; + unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; + unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; + + memset ( &init_hca, 0, sizeof ( init_hca ) ); + icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 ); + + /* Queue pair contexts */ + log_num_qps = fls ( arbel->limits.reserved_qps + ARBEL_MAX_QPS - 1 ); + MLX_FILL_2 ( &init_hca, 13, + qpc_eec_cqc_eqc_rdb_parameters.qpc_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, + log_num_qps ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); + + /* Extended queue pair contexts */ + MLX_FILL_1 ( &init_hca, 25, + qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, + icm_offset ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); + + /* Shared receive queue contexts */ + log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); + MLX_FILL_2 ( &init_hca, 19, + qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l, + ( icm_offset >> 5 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, + log_num_srqs ); + icm_offset += icm_usage ( log_num_srqs, arbel->limits.srqc_entry_size ); + + /* End-to-end contexts */ + log_num_ees = fls ( arbel->limits.reserved_ees - 1 ); + MLX_FILL_2 ( &init_hca, 17, + qpc_eec_cqc_eqc_rdb_parameters.eec_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, + log_num_ees ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); + + /* Extended end-to-end contexts */ + MLX_FILL_1 ( &init_hca, 29, + qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, + icm_offset ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); + + /* Completion queue contexts */ + log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 ); + MLX_FILL_2 ( &init_hca, 21, + qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, + log_num_cqs ); + icm_offset += icm_usage ( log_num_cqs, arbel->limits.cqc_entry_size ); + + /* Memory translation table */ + log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); + MLX_FILL_1 ( &init_hca, 65, + tpt_parameters.mtt_base_addr_l, icm_offset ); + icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); + + /* Memory protection table */ + log_num_mpts = fls ( arbel->limits.reserved_mrws - 1 ); + MLX_FILL_1 ( &init_hca, 61, + tpt_parameters.mpt_base_adr_l, icm_offset ); + MLX_FILL_1 ( &init_hca, 62, + tpt_parameters.log_mpt_sz, log_num_mpts ); + icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); + + /* RDMA something or other */ + log_num_rdbs = fls ( arbel->limits.reserved_rdbs - 1 ); + MLX_FILL_1 ( &init_hca, 37, + qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, + icm_offset ); + icm_offset += icm_usage ( log_num_rdbs, 32 ); + + /* Event queue contexts */ + log_num_eqs = 6; + MLX_FILL_2 ( &init_hca, 33, + qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, + log_num_eqs ); + icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); + + /* Multicast table */ + + return 0; } From f20cd6f721fa5f3b0c639a76a9269c409c611ef7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 02:05:25 +0100 Subject: [PATCH 69/84] ICM allocation code *should* now be complete. --- src/drivers/net/mlx_ipoib/arbel.h | 36 +++++++++++++++++++++++------ src/drivers/net/mlx_ipoib/mt25218.c | 36 +++++++++++++++++++++++------ 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index adcab36c..02632f28 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -160,15 +160,37 @@ union arbelprm_mad { struct arbel_dev_limits { /** Number of reserved QPs */ unsigned int reserved_qps; - /** Number of reserved EEs */ - unsigned int reserved_ees; - /** Number of reserved MTTs */ - unsigned int reserved_mtts; - /** Number of reserved CQs */ - unsigned int reserved_cqs; + /** QP context entry size */ + size_t qpc_entry_size; + /** Extended QP context entry size */ + size_t eqpc_entry_size; /** Number of reserved SRQs */ unsigned int reserved_srqs; - /** Number of reserver UARs */ + /** SRQ context entry size */ + size_t srqc_entry_size; + /** Number of reserved EEs */ + unsigned int reserved_ees; + /** EE context entry size */ + size_t eec_entry_size; + /** Extended EE context entry size */ + size_t eeec_entry_size; + /** Number of reserved CQs */ + unsigned int reserved_cqs; + /** CQ context entry size */ + size_t cqc_entry_size; + /** Number of reserved MTTs */ + unsigned int reserved_mtts; + /** MTT entry size */ + size_t mtt_entry_size; + /** Number of reserved MRWs */ + unsigned int reserved_mrws; + /** MPT entry size */ + size_t mpt_entry_size; + /** Number of reserved RDBs */ + unsigned int reserved_rdbs; + /** EQ context entry size */ + size_t eqc_entry_size; + /** Number of reserved UARs */ unsigned int reserved_uars; }; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index da77a03d..b1e34151 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1494,14 +1494,27 @@ static int arbel_get_limits ( struct arbel *arbel ) { arbel->limits.reserved_qps = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - arbel->limits.reserved_ees = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); - arbel->limits.reserved_mtts = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); - arbel->limits.reserved_cqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.qpc_entry_size = MLX_GET ( &dev_lim, qpc_entry_sz ); + arbel->limits.eqpc_entry_size = MLX_GET ( &dev_lim, eqpc_entry_sz ); arbel->limits.reserved_srqs = ( 1 << MLX_GET ( &dev_lim, log2_rsvd_srqs ) ); + arbel->limits.srqc_entry_size = MLX_GET ( &dev_lim, srq_entry_sz ); + arbel->limits.reserved_ees = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); + arbel->limits.eec_entry_size = MLX_GET ( &dev_lim, eec_entry_sz ); + arbel->limits.eeec_entry_size = MLX_GET ( &dev_lim, eeec_entry_sz ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.cqc_entry_size = MLX_GET ( &dev_lim, cqc_entry_sz ); + arbel->limits.reserved_mtts = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); + arbel->limits.mtt_entry_size = MLX_GET ( &dev_lim, mtt_entry_sz ); + arbel->limits.reserved_mrws = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mrws ) ); + arbel->limits.mpt_entry_size = MLX_GET ( &dev_lim, mpt_entry_sz ); + arbel->limits.reserved_rdbs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_rdbs ) ); + arbel->limits.eqc_entry_size = MLX_GET ( &dev_lim, eqc_entry_sz ); arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); return 0; @@ -1616,7 +1629,16 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); /* Multicast table */ - + MLX_FILL_1 ( &init_hca, 49, + multicast_parameters.mc_base_addr_l, icm_offset ); + MLX_FILL_1 ( &init_hca, 52, + multicast_parameters.log_mc_table_entry_sz, + fls ( sizeof ( struct arbelprm_mgm_entry ) - 1 ) ); + MLX_FILL_1 ( &init_hca, 53, + multicast_parameters.mc_table_hash_sz, 8 ); + MLX_FILL_1 ( &init_hca, 54, + multicast_parameters.log_mc_table_sz, 3 ); + icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); return 0; } From 4ea6223f5a54712a4410fabe1d4ccbd4d6ec0ab1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 02:57:49 +0100 Subject: [PATCH 70/84] ICM allocation code in place; about to start test-and-compare runs. --- src/drivers/net/mlx_ipoib/arbel.h | 20 ++- src/drivers/net/mlx_ipoib/mt25218.c | 198 +++++++++++++++++++++++----- 2 files changed, 187 insertions(+), 31 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 02632f28..79e19a91 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -50,6 +50,11 @@ #define ARBEL_HCR_WRITE_MGM 0x0026 #define ARBEL_HCR_MGID_HASH 0x0027 #define ARBEL_HCR_RUN_FW 0x0ff6 +#define ARBEL_HCR_UNMAP_ICM 0x0ff9 +#define ARBEL_HCR_MAP_ICM 0x0ffa +#define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb +#define ARBEL_HCR_MAP_ICM_AUX 0x0ffc +#define ARBEL_HCR_SET_ICM_SIZE 0x0ffd #define ARBEL_HCR_UNMAP_FA 0x0ffe #define ARBEL_HCR_MAP_FA 0x0fff @@ -70,7 +75,13 @@ struct arbelprm_mgm_hash_st { /* -------------- */ pseudo_bit_t hash[0x00010]; pseudo_bit_t reserved1[0x00010]; -}; +} __attribute__ (( packed )); + +struct arbelprm_scalar_parameter_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t value[0x00020]; +} __attribute__ (( packed )); /* * Wrapper structures for hardware datatypes @@ -93,6 +104,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); struct MLX_DECLARE_STRUCT ( arbelprm_query_fw ); struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_scalar_parameter ); struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping ); @@ -289,6 +301,12 @@ struct arbel { /** Firmware area in external memory */ userptr_t firmware_area; + /** ICM size */ + size_t icm_len; + /** ICM AUX size */ + size_t icm_aux_len; + /** ICM area */ + userptr_t icm; /** Doorbell records */ union arbelprm_doorbell_record *db_rec; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index b1e34151..69f24054 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -331,6 +331,49 @@ arbel_cmd_run_fw ( struct arbel *arbel ) { 0, NULL, 0, NULL ); } +static inline int +arbel_cmd_unmap_icm ( struct arbel *arbel, unsigned int page_count ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM ), + 0, NULL, page_count, NULL ); +} + +static inline int +arbel_cmd_map_icm ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_unmap_icm_aux ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM_AUX ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_icm_aux ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM_AUX, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_set_icm_size ( struct arbel *arbel, + const struct arbelprm_scalar_parameter *icm_size, + struct arbelprm_scalar_parameter *icm_aux_size ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_SET_ICM_SIZE, + 0, sizeof ( *icm_size ), + 0, sizeof ( *icm_aux_size ) ), + 0, icm_size, 0, icm_aux_size ); +} + static inline int arbel_cmd_unmap_fa ( struct arbel *arbel ) { return arbel_cmd ( arbel, @@ -340,11 +383,11 @@ arbel_cmd_unmap_fa ( struct arbel *arbel ) { static inline int arbel_cmd_map_fa ( struct arbel *arbel, - const struct arbelprm_virtual_physical_mapping *map_fa ) { + const struct arbelprm_virtual_physical_mapping *map ) { return arbel_cmd ( arbel, ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_FA, - 1, sizeof ( *map_fa ) ), - 0, map_fa, 1, NULL ); + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); } /*************************************************************************** @@ -1536,23 +1579,28 @@ static size_t icm_usage ( unsigned int log_num_entries, size_t entry_size ) { } /** - * Partition ICM + * Allocate ICM * * @v arbel Arbel device + * @v init_hca INIT_HCA structure to fill in * @ret rc Return status code */ -static int arbel_alloc_icm ( struct arbel *arbel ) { - struct arbelprm_init_hca init_hca; +static int arbel_alloc_icm ( struct arbel *arbel, + struct arbelprm_init_hca *init_hca ) { + struct arbelprm_scalar_parameter icm_size; + struct arbelprm_scalar_parameter icm_aux_size; + struct arbelprm_virtual_physical_mapping map_icm_aux; + struct arbelprm_virtual_physical_mapping map_icm; size_t icm_offset = 0; unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; + int rc; - memset ( &init_hca, 0, sizeof ( init_hca ) ); icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 ); /* Queue pair contexts */ log_num_qps = fls ( arbel->limits.reserved_qps + ARBEL_MAX_QPS - 1 ); - MLX_FILL_2 ( &init_hca, 13, + MLX_FILL_2 ( init_hca, 13, qpc_eec_cqc_eqc_rdb_parameters.qpc_base_addr_l, ( icm_offset >> 7 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, @@ -1560,14 +1608,14 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); /* Extended queue pair contexts */ - MLX_FILL_1 ( &init_hca, 25, + MLX_FILL_1 ( init_hca, 25, qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, icm_offset ); icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); /* Shared receive queue contexts */ log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); - MLX_FILL_2 ( &init_hca, 19, + MLX_FILL_2 ( init_hca, 19, qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l, ( icm_offset >> 5 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, @@ -1576,7 +1624,7 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { /* End-to-end contexts */ log_num_ees = fls ( arbel->limits.reserved_ees - 1 ); - MLX_FILL_2 ( &init_hca, 17, + MLX_FILL_2 ( init_hca, 17, qpc_eec_cqc_eqc_rdb_parameters.eec_base_addr_l, ( icm_offset >> 7 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, @@ -1584,14 +1632,14 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); /* Extended end-to-end contexts */ - MLX_FILL_1 ( &init_hca, 29, + MLX_FILL_1 ( init_hca, 29, qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, icm_offset ); icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); /* Completion queue contexts */ log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 ); - MLX_FILL_2 ( &init_hca, 21, + MLX_FILL_2 ( init_hca, 21, qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l, ( icm_offset >> 6 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, @@ -1600,28 +1648,28 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { /* Memory translation table */ log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); - MLX_FILL_1 ( &init_hca, 65, + MLX_FILL_1 ( init_hca, 65, tpt_parameters.mtt_base_addr_l, icm_offset ); icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); /* Memory protection table */ log_num_mpts = fls ( arbel->limits.reserved_mrws - 1 ); - MLX_FILL_1 ( &init_hca, 61, + MLX_FILL_1 ( init_hca, 61, tpt_parameters.mpt_base_adr_l, icm_offset ); - MLX_FILL_1 ( &init_hca, 62, + MLX_FILL_1 ( init_hca, 62, tpt_parameters.log_mpt_sz, log_num_mpts ); icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); /* RDMA something or other */ log_num_rdbs = fls ( arbel->limits.reserved_rdbs - 1 ); - MLX_FILL_1 ( &init_hca, 37, + MLX_FILL_1 ( init_hca, 37, qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, icm_offset ); icm_offset += icm_usage ( log_num_rdbs, 32 ); /* Event queue contexts */ log_num_eqs = 6; - MLX_FILL_2 ( &init_hca, 33, + MLX_FILL_2 ( init_hca, 33, qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l, ( icm_offset >> 6 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, @@ -1629,18 +1677,87 @@ static int arbel_alloc_icm ( struct arbel *arbel ) { icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); /* Multicast table */ - MLX_FILL_1 ( &init_hca, 49, + MLX_FILL_1 ( init_hca, 49, multicast_parameters.mc_base_addr_l, icm_offset ); - MLX_FILL_1 ( &init_hca, 52, + MLX_FILL_1 ( init_hca, 52, multicast_parameters.log_mc_table_entry_sz, fls ( sizeof ( struct arbelprm_mgm_entry ) - 1 ) ); - MLX_FILL_1 ( &init_hca, 53, + MLX_FILL_1 ( init_hca, 53, multicast_parameters.mc_table_hash_sz, 8 ); - MLX_FILL_1 ( &init_hca, 54, + MLX_FILL_1 ( init_hca, 54, multicast_parameters.log_mc_table_sz, 3 ); icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); + arbel->icm_len = icm_offset; + arbel->icm_len = ( ( arbel->icm_len + 4095 ) & ~4095 ); + + /* Get ICM auxiliary area size */ + memset ( &icm_size, 0, sizeof ( icm_size ) ); + MLX_FILL_1 ( &icm_size, 1, value, arbel->icm_len ); + if ( ( rc = arbel_cmd_set_icm_size ( arbel, &icm_size, + &icm_aux_size ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set ICM size: %s\n", + arbel, strerror ( rc ) ); + goto err_set_icm_size; + } + arbel->icm_aux_len = MLX_GET ( &icm_aux_size, value ); + + /* Allocate ICM data and auxiliary area */ + arbel->icm_aux_len = ( ( arbel->icm_aux_len + 4095 ) & ~4095 ); + DBGC ( arbel, "Arbel %p requires %zd kB ICM and %zd kB AUX ICM\n", + arbel, ( arbel->icm_len / 1024 ), + ( arbel->icm_aux_len / 1024 ) ); + arbel->icm = umalloc ( arbel->icm_len + arbel->icm_aux_len ); + if ( ! arbel->icm ) { + rc = -ENOMEM; + goto err_alloc; + } + + /* Map ICM auxiliary area */ + memset ( &map_icm_aux, 0, sizeof ( map_icm_aux ) ); + MLX_FILL_2 ( &map_icm_aux, 3, + log2size, fls ( ( arbel->icm_aux_len / 4096 ) - 1 ), + pa_l, user_to_phys ( arbel->icm, arbel->icm_len ) ); + if ( ( rc = arbel_cmd_map_icm_aux ( arbel, &map_icm_aux ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map AUX ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm_aux; + } + + /* MAP ICM area */ + memset ( &map_icm, 0, sizeof ( map_icm ) ); + MLX_FILL_2 ( &map_icm, 3, + log2size, fls ( ( arbel->icm_len / 4096 ) - 1 ), + pa_l, user_to_phys ( arbel->icm, 0 ) ); + if ( ( rc = arbel_cmd_map_icm ( arbel, &map_icm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm; + } + return 0; + + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + err_map_icm: + arbel_cmd_unmap_icm_aux ( arbel ); + err_map_icm_aux: + ufree ( arbel->icm ); + arbel->icm = UNULL; + err_alloc: + err_set_icm_size: + return rc; +} + +/** + * Free ICM + * + * @v arbel Arbel device + */ +static void arbel_free_icm ( struct arbel *arbel ) { + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + arbel_cmd_unmap_icm_aux ( arbel ); + ufree ( arbel->icm ); + arbel->icm = UNULL; } /*************************************************************************** @@ -1661,6 +1778,7 @@ static int arbel_probe ( struct pci_device *pci, const struct pci_device_id *id __unused ) { struct ib_device *ibdev; struct arbel *arbel; + struct arbelprm_init_hca init_hca; int rc; /* Allocate Infiniband device */ @@ -1697,23 +1815,41 @@ static int arbel_probe ( struct pci_device *pci, goto err_mailbox_out; } +#define SELF_INIT 0 + +#if SELF_INIT /* Start firmware */ if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) goto err_start_firmware; +#else + /* Initialise hardware */ + udqp_t qph; + if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) + goto err_ib_driver_init; +#endif /* Get device limits */ if ( ( rc = arbel_get_limits ( arbel ) ) != 0 ) goto err_get_limits; - while ( 1 ) {} +#if SELF_INIT + /* Allocate ICM */ + memset ( &init_hca, 0, sizeof ( init_hca ) ); + if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) + goto err_alloc_icm; + + /* Initialise HCA */ + if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n", + arbel, strerror ( rc ) ); + goto err_init_hca; + } +#endif + -#if 0 - /* Initialise hardware */ - if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) - goto err_ib_driver_init; /* Hack up IB structures */ - +#if 0 arbel->config = memfree_pci_dev.cr_space; arbel->uar = memfree_pci_dev.uar; arbel->mailbox_in = dev_buffers_p->inprm_buf; @@ -1728,7 +1864,6 @@ static int arbel_probe ( struct pci_device *pci, if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) goto err_get_mad_params; - /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", @@ -1743,7 +1878,10 @@ static int arbel_probe ( struct pci_device *pci, ib_driver_close ( 0 ); err_ib_driver_init: - + + err_init_hca: + arbel_free_icm ( arbel ); + err_alloc_icm: err_get_limits: arbel_stop_firmware ( arbel ); err_start_firmware: From 57bf8d9905baead5485055b0d3717b9a252c8feb Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 03:58:59 +0100 Subject: [PATCH 71/84] Seems to get through the ICM mapping process --- src/drivers/net/mlx_ipoib/arbel.h | 3 ++ src/drivers/net/mlx_ipoib/cmdif_comm.c | 27 +++++++--------- src/drivers/net/mlx_ipoib/ib_mt25218.c | 13 ++++++++ src/drivers/net/mlx_ipoib/mt25218.c | 45 ++++++++++++++++++++++---- 4 files changed, 67 insertions(+), 21 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 79e19a91..243aaa47 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -50,6 +50,8 @@ #define ARBEL_HCR_WRITE_MGM 0x0026 #define ARBEL_HCR_MGID_HASH 0x0027 #define ARBEL_HCR_RUN_FW 0x0ff6 +#define ARBEL_HCR_DISABLE_LAM 0x0ff7 +#define ARBEL_HCR_ENABLE_LAM 0x0ff8 #define ARBEL_HCR_UNMAP_ICM 0x0ff9 #define ARBEL_HCR_MAP_ICM 0x0ffa #define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb @@ -88,6 +90,7 @@ struct arbelprm_scalar_parameter_st { * */ +struct MLX_DECLARE_STRUCT ( arbelprm_access_lam ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); diff --git a/src/drivers/net/mlx_ipoib/cmdif_comm.c b/src/drivers/net/mlx_ipoib/cmdif_comm.c index 97f288bc..363e4209 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_comm.c +++ b/src/drivers/net/mlx_ipoib/cmdif_comm.c @@ -134,12 +134,10 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) DBG_HD ( &hcr[0], sizeof ( hcr ) ); if ( cmd_prms->in_trans == TRANS_MAILBOX ) { size_t size = ( 4 * cmd_prms->in_param_size ); - if ( size > 256 ) - size = 256; -#if ! CREATE_OWN - DBG ( "Input mailbox:\n" ); - DBG_HD ( &cmd_prms->in_param[0], size ); -#endif + if ( size > 512 ) + size = 512; + DBG2 ( "Input mailbox:\n" ); + DBG2_HD ( &cmd_prms->in_param[0], size ); } for (i = 0; i < 7; ++i) { @@ -172,6 +170,14 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) return status; } + if ( cmd_prms->out_trans == TRANS_MAILBOX ) { + size_t size = ( 4 * cmd_prms->out_param_size ); + if ( size > 512 ) + size = 512; + DBG2 ( "Output mailbox:\n" ); + DBG2_HD ( &cmd_prms->out_param[0], size ); + } + if (cmd_prms->out_trans == TRANS_MAILBOX) be_to_cpu_buf(cmd_prms->out_param, cmd_prms->out_param_size); else if (cmd_prms->out_trans == TRANS_IMMEDIATE) { @@ -181,15 +187,6 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) return -1; } - if ( cmd_prms->out_trans == TRANS_MAILBOX ) { - size_t size = ( 4 * cmd_prms->out_param_size ); - if ( size > 256 ) - size = 256; -#if ! CREATE_OWN - DBG ( "Output mailbox:\n" ); - DBG_HD ( &cmd_prms->out_param[0], size ); -#endif - } DBG ( "Command executed successfully\n" ); return 0; diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index a5d251d4..caf924a3 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -758,17 +758,20 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_qps, MAX_APP_QPS, dev_lim.qpc_entry_sz, &log2_entries); + DBG ( "qpc_base_addr_l = %lx\n", icm_start ); init_hca.qpc_base_addr_l = icm_start; init_hca.log_num_of_qp = log2_entries; icm_start += (tmp << 12); icm_size += (tmp << 12); + DBG ( "eqpc_base_addr_l = %lx\n", icm_start ); init_hca.eqpc_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); tmp = get_req_icm_pages(dev_lim.log2_rsvd_srqs, 0, dev_lim.srq_entry_sz, &log2_entries); + DBG ( "srqc_base_addr_l = %lx\n", icm_start ); init_hca.srqc_base_addr_l = icm_start; init_hca.log_num_of_srq = log2_entries; icm_start += (tmp << 12); @@ -776,15 +779,18 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_ees, 0, dev_lim.eec_entry_sz, &log2_entries); + DBG ( "eec_base_addr_l = %lx\n", icm_start ); init_hca.eec_base_addr_l = icm_start; init_hca.log_num_of_ee = log2_entries; icm_start += (tmp << 12); icm_size += (tmp << 12); + DBG ( "eeec_base_addr_l = %lx\n", icm_start ); init_hca.eeec_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); + DBG ( "cqc_base_addr_l = %lx\n", icm_start ); tmp = get_req_icm_pages(dev_lim.log2_rsvd_cqs, MAX_APP_CQS, dev_lim.cqc_entry_sz, &log2_entries); @@ -795,12 +801,14 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_mtts, 0, dev_lim.mtt_entry_sz, &log2_entries); + DBG ( "mtt_base_addr_l = %lx\n", icm_start ); init_hca.mtt_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); tmp = get_req_icm_pages(dev_lim.log2_rsvd_mrws, 1, dev_lim.mpt_entry_sz, &log2_entries); + DBG ( "mpt_base_addr_l = %lx\n", icm_start ); init_hca.mpt_base_addr_l = icm_start; init_hca.log_mpt_sz = log2_entries; icm_start += (tmp << 12); @@ -808,16 +816,19 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_rdbs, 1, 32, /* size of rdb entry */ &log2_entries); + DBG ( "rdb_base_addr_l = %lx\n", icm_start ); init_hca.rdb_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); + DBG ( "eqc_base_addr_l = %lx\n", icm_start ); init_hca.eqc_base_addr_l = icm_start; init_hca.log_num_of_eq = LOG2_EQS; tmp = dev_lim.eqc_entry_sz * (1 << LOG2_EQS); icm_start += tmp; icm_size += tmp; + DBG ( "mc_base_addr_l = %lx\n", icm_start ); init_hca.mc_base_addr_l = icm_start; init_hca.log_mc_table_entry_sz = my_log2(MT_STRUCT_SIZE(arbelprm_mgm_entry_st)); @@ -828,6 +839,8 @@ static int setup_hca(__u8 port, void **eq_p) icm_start += (MT_STRUCT_SIZE(arbelprm_mgm_entry_st) * init_hca.mc_table_hash_sz); + DBG ( "icm_size = %lx\n", icm_size ); + rc = cmd_set_icm_size(icm_size, &aux_pages); if (rc) { ret = -1; diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 69f24054..1f343d62 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -331,6 +331,21 @@ arbel_cmd_run_fw ( struct arbel *arbel ) { 0, NULL, 0, NULL ); } +static inline int +arbel_cmd_disable_lam ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_DISABLE_LAM ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_enable_lam ( struct arbel *arbel, struct arbelprm_access_lam *lam ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_ENABLE_LAM, + 1, sizeof ( *lam ) ), + 1, NULL, 0, lam ); +} + static inline int arbel_cmd_unmap_icm ( struct arbel *arbel, unsigned int page_count ) { return arbel_cmd ( arbel, @@ -1431,6 +1446,7 @@ static int arbel_get_mad_params ( struct ib_device *ibdev ) { */ static int arbel_start_firmware ( struct arbel *arbel ) { struct arbelprm_query_fw fw; + struct arbelprm_access_lam lam; struct arbelprm_virtual_physical_mapping map_fa; unsigned int fw_pages; unsigned int log2_fw_pages; @@ -1453,6 +1469,11 @@ static int arbel_start_firmware ( struct arbel *arbel ) { DBGC ( arbel, "Arbel %p requires %d kB for firmware\n", arbel, ( fw_pages * 4 ) ); + /* Enable locally-attached memory. Ignore failure; there may + * be no attached memory. + */ + arbel_cmd_enable_lam ( arbel, &lam ); + /* Allocate firmware pages and map firmware area */ fw_size = ( fw_pages * 4096 ); arbel->firmware_area = umalloc ( fw_size ); @@ -1605,13 +1626,16 @@ static int arbel_alloc_icm ( struct arbel *arbel, ( icm_offset >> 7 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, log_num_qps ); + DBGC ( arbel, "Arbel %p ICM QPC base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); /* Extended queue pair contexts */ MLX_FILL_1 ( init_hca, 25, qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, icm_offset ); - icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); + DBGC ( arbel, "Arbel %p ICM EQPC base = %zx\n", arbel, icm_offset ); + // icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); /* Shared receive queue contexts */ log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); @@ -1620,6 +1644,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, ( icm_offset >> 5 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, log_num_srqs ); + DBGC ( arbel, "Arbel %p ICM SRQC base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_srqs, arbel->limits.srqc_entry_size ); /* End-to-end contexts */ @@ -1629,12 +1654,14 @@ static int arbel_alloc_icm ( struct arbel *arbel, ( icm_offset >> 7 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, log_num_ees ); + DBGC ( arbel, "Arbel %p ICM EEC base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); /* Extended end-to-end contexts */ MLX_FILL_1 ( init_hca, 29, qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, icm_offset ); + DBGC ( arbel, "Arbel %p ICM EEEC base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); /* Completion queue contexts */ @@ -1644,12 +1671,14 @@ static int arbel_alloc_icm ( struct arbel *arbel, ( icm_offset >> 6 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, log_num_cqs ); + DBGC ( arbel, "Arbel %p ICM CQC base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_cqs, arbel->limits.cqc_entry_size ); /* Memory translation table */ log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); MLX_FILL_1 ( init_hca, 65, tpt_parameters.mtt_base_addr_l, icm_offset ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); /* Memory protection table */ @@ -1658,6 +1687,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, tpt_parameters.mpt_base_adr_l, icm_offset ); MLX_FILL_1 ( init_hca, 62, tpt_parameters.log_mpt_sz, log_num_mpts ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); /* RDMA something or other */ @@ -1665,6 +1695,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, MLX_FILL_1 ( init_hca, 37, qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, icm_offset ); + DBGC ( arbel, "Arbel %p ICM RDB base = %zx\n", arbel, icm_offset ); icm_offset += icm_usage ( log_num_rdbs, 32 ); /* Event queue contexts */ @@ -1674,6 +1705,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, ( icm_offset >> 6 ), qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, log_num_eqs ); + DBGC ( arbel, "Arbel %p ICM EQ base = %zx\n", arbel, icm_offset ); icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); /* Multicast table */ @@ -1686,6 +1718,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, multicast_parameters.mc_table_hash_sz, 8 ); MLX_FILL_1 ( init_hca, 54, multicast_parameters.log_mc_table_sz, 3 ); + DBGC ( arbel, "Arbel %p ICM MC base = %zx\n", arbel, icm_offset ); icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); arbel->icm_len = icm_offset; @@ -1700,10 +1733,9 @@ static int arbel_alloc_icm ( struct arbel *arbel, arbel, strerror ( rc ) ); goto err_set_icm_size; } - arbel->icm_aux_len = MLX_GET ( &icm_aux_size, value ); + arbel->icm_aux_len = ( MLX_GET ( &icm_aux_size, value ) * 4096 ); /* Allocate ICM data and auxiliary area */ - arbel->icm_aux_len = ( ( arbel->icm_aux_len + 4095 ) & ~4095 ); DBGC ( arbel, "Arbel %p requires %zd kB ICM and %zd kB AUX ICM\n", arbel, ( arbel->icm_len / 1024 ), ( arbel->icm_aux_len / 1024 ) ); @@ -1717,7 +1749,8 @@ static int arbel_alloc_icm ( struct arbel *arbel, memset ( &map_icm_aux, 0, sizeof ( map_icm_aux ) ); MLX_FILL_2 ( &map_icm_aux, 3, log2size, fls ( ( arbel->icm_aux_len / 4096 ) - 1 ), - pa_l, user_to_phys ( arbel->icm, arbel->icm_len ) ); + pa_l, + ( user_to_phys ( arbel->icm, arbel->icm_len ) >> 12 ) ); if ( ( rc = arbel_cmd_map_icm_aux ( arbel, &map_icm_aux ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not map AUX ICM: %s\n", arbel, strerror ( rc ) ); @@ -1728,7 +1761,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, memset ( &map_icm, 0, sizeof ( map_icm ) ); MLX_FILL_2 ( &map_icm, 3, log2size, fls ( ( arbel->icm_len / 4096 ) - 1 ), - pa_l, user_to_phys ( arbel->icm, 0 ) ); + pa_l, ( user_to_phys ( arbel->icm, 0 ) >> 12 ) ); if ( ( rc = arbel_cmd_map_icm ( arbel, &map_icm ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not map ICM: %s\n", arbel, strerror ( rc ) ); @@ -1815,7 +1848,7 @@ static int arbel_probe ( struct pci_device *pci, goto err_mailbox_out; } -#define SELF_INIT 0 +#define SELF_INIT 1 #if SELF_INIT /* Start firmware */ From da014080f9393c19679a3d3b5e34d06ada5b599e Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 04:20:41 +0100 Subject: [PATCH 72/84] INIT_HCA block now matches. --- src/drivers/net/mlx_ipoib/ib_mt25218.c | 1 + src/drivers/net/mlx_ipoib/mt25218.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index caf924a3..9473af89 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -811,6 +811,7 @@ static int setup_hca(__u8 port, void **eq_p) DBG ( "mpt_base_addr_l = %lx\n", icm_start ); init_hca.mpt_base_addr_l = icm_start; init_hca.log_mpt_sz = log2_entries; + DBG ( "log2_entries for mpt = %d\n", log2_entries ); icm_start += (tmp << 12); icm_size += (tmp << 12); diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 1f343d62..f9a0bb1b 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -169,7 +169,7 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, DBGC2_HD ( arbel, &hcr, sizeof ( hcr ) ); if ( in_len ) { DBGC2 ( arbel, "Input:\n" ); - DBGC2_HD ( arbel, in, ( ( in_len < 256 ) ? in_len : 256 ) ); + DBGC2_HD ( arbel, in, ( ( in_len < 512 ) ? in_len : 512 ) ); } /* Issue command */ @@ -203,7 +203,7 @@ static int arbel_cmd ( struct arbel *arbel, unsigned long command, memcpy ( out, out_buffer, out_len ); if ( out_len ) { DBGC2 ( arbel, "Output:\n" ); - DBGC2_HD ( arbel, out, ( ( out_len < 256 ) ? out_len : 256 ) ); + DBGC2_HD ( arbel, out, ( ( out_len < 512 ) ? out_len : 512 ) ); } return 0; @@ -1682,7 +1682,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); /* Memory protection table */ - log_num_mpts = fls ( arbel->limits.reserved_mrws - 1 ); + log_num_mpts = fls ( arbel->limits.reserved_mrws + 1 - 1 ); MLX_FILL_1 ( init_hca, 61, tpt_parameters.mpt_base_adr_l, icm_offset ); MLX_FILL_1 ( init_hca, 62, @@ -1872,6 +1872,7 @@ static int arbel_probe ( struct pci_device *pci, goto err_alloc_icm; /* Initialise HCA */ + MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n", arbel, strerror ( rc ) ); From def5ae9127dab8c559a971e787d44cbaf119804f Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 04:45:55 +0100 Subject: [PATCH 73/84] Added sw2hw_mpt --- src/drivers/net/mlx_ipoib/arbel.h | 6 +++ src/drivers/net/mlx_ipoib/mt25218.c | 75 ++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 243aaa47..2755182f 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -39,6 +39,8 @@ #define ARBEL_HCR_QUERY_DEV_LIM 0x0003 #define ARBEL_HCR_QUERY_FW 0x0004 #define ARBEL_HCR_INIT_HCA 0x0007 +#define ARBEL_HCR_CLOSE_HCA 0x0008 +#define ARBEL_HCR_SW2HW_MPT 0x000d #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 #define ARBEL_HCR_RST2INIT_QPEE 0x0019 @@ -101,6 +103,7 @@ struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); +struct MLX_DECLARE_STRUCT ( arbelprm_mpt ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); @@ -333,6 +336,9 @@ struct arbel { /** Global protection domain */ #define ARBEL_GLOBAL_PD 0x123456 +/** Memory key prefix */ +#define ARBEL_MKEY_PREFIX 0x77000000UL + /* * HCA commands * diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index f9a0bb1b..0bd5385f 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -235,6 +235,22 @@ arbel_cmd_init_hca ( struct arbel *arbel, 0, init_hca, 0, NULL ); } +static inline int +arbel_cmd_close_hca ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_HCA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mpt *mpt ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_MPT, + 1, sizeof ( *mpt ) ), + 0, mpt, index, NULL ); +} + static inline int arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, const struct arbelprm_completion_queue_context *cqctx ) { @@ -405,6 +421,16 @@ arbel_cmd_map_fa ( struct arbel *arbel, 0, map, 1, NULL ); } +/*************************************************************************** + * + * Event queue operations + * + *************************************************************************** + */ + +static int arbel_create_eq ( struct arbel *arbel ) { +} + /*************************************************************************** * * Completion queue operations @@ -1800,6 +1826,42 @@ static void arbel_free_icm ( struct arbel *arbel ) { *************************************************************************** */ +/** + * Set up memory protection table + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_setup_mpt ( struct arbel *arbel ) { + struct arbelprm_mpt mpt; + uint32_t key; + int rc; + + /* Derive key */ + key = ( arbel->limits.reserved_mrws | ARBEL_MKEY_PREFIX ); + arbel->reserved_lkey = ( ( key << 8 ) | ( key >> 24 ) ); + + /* Initialise memory protection table */ + memset ( &mpt, 0, sizeof ( mpt ) ); + MLX_FILL_4 ( &mpt, 0, + r_w, 1, + pa, 1, + lr, 1, + lw, 1 ); + MLX_FILL_1 ( &mpt, 2, mem_key, key ); + MLX_FILL_1 ( &mpt, 3, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &mpt, 6, reg_wnd_len_h, 0xffffffffUL ); + MLX_FILL_1 ( &mpt, 7, reg_wnd_len_l, 0xffffffffUL ); + if ( ( rc = arbel_cmd_sw2hw_mpt ( arbel, arbel->limits.reserved_mrws, + &mpt ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set up MPT: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + /** * Probe PCI device * @@ -1878,6 +1940,12 @@ static int arbel_probe ( struct pci_device *pci, arbel, strerror ( rc ) ); goto err_init_hca; } + + /* Set up memory protection */ + if ( ( rc = arbel_setup_mpt ( arbel ) ) != 0 ) + goto err_setup_mpt; + + #endif @@ -1889,8 +1957,10 @@ static int arbel_probe ( struct pci_device *pci, arbel->mailbox_in = dev_buffers_p->inprm_buf; arbel->mailbox_out = dev_buffers_p->outprm_buf; #endif - arbel->db_rec = dev_ib_data.uar_context_base; +#if ! SELF_INIT arbel->reserved_lkey = dev_ib_data.mkey; +#endif + arbel->db_rec = dev_ib_data.uar_context_base; arbel->eqn = dev_ib_data.eq.eqn; @@ -1912,7 +1982,8 @@ static int arbel_probe ( struct pci_device *pci, ib_driver_close ( 0 ); err_ib_driver_init: - + err_setup_mpt: + arbel_cmd_close_hca ( arbel ); err_init_hca: arbel_free_icm ( arbel ); err_alloc_icm: From 8423b0b109bd611fd3fe39725b14142c26a3d156 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 05:39:34 +0100 Subject: [PATCH 74/84] Gets most of the way through initialisation. --- src/drivers/net/mlx_ipoib/arbel.h | 11 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 2 +- src/drivers/net/mlx_ipoib/mt25218.c | 164 ++++++++++++++++++++++--- 3 files changed, 156 insertions(+), 21 deletions(-) diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h index 2755182f..dcca5107 100644 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ b/src/drivers/net/mlx_ipoib/arbel.h @@ -40,7 +40,12 @@ #define ARBEL_HCR_QUERY_FW 0x0004 #define ARBEL_HCR_INIT_HCA 0x0007 #define ARBEL_HCR_CLOSE_HCA 0x0008 +#define ARBEL_HCR_INIT_IB 0x0009 +#define ARBEL_HCR_CLOSE_IB 0x000a #define ARBEL_HCR_SW2HW_MPT 0x000d +#define ARBEL_HCR_MAP_EQ 0x0012 +#define ARBEL_HCR_SW2HW_EQ 0x0013 +#define ARBEL_HCR_HW2SW_EQ 0x0014 #define ARBEL_HCR_SW2HW_CQ 0x0016 #define ARBEL_HCR_HW2SW_CQ 0x0017 #define ARBEL_HCR_RST2INIT_QPEE 0x0019 @@ -68,6 +73,8 @@ /* MTUs */ #define ARBEL_MTU_2048 0x04 +#define ARBEL_NO_EQ 64 + #define ARBEL_INVALID_LKEY 0x00000100UL /* @@ -98,8 +105,10 @@ struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_eqc ); struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_ib ); struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); @@ -321,8 +330,6 @@ struct arbel { * Used to get unrestricted memory access. */ unsigned long reserved_lkey; - /** Event queue number */ - unsigned long eqn; /** Completion queue in-use bitmask */ arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 9473af89..174a2309 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -811,7 +811,7 @@ static int setup_hca(__u8 port, void **eq_p) DBG ( "mpt_base_addr_l = %lx\n", icm_start ); init_hca.mpt_base_addr_l = icm_start; init_hca.log_mpt_sz = log2_entries; - DBG ( "log2_entries for mpt = %d\n", log2_entries ); + DBG ( "log2_entries for mpt = %ld\n", log2_entries ); icm_start += (tmp << 12); icm_size += (tmp << 12); diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 0bd5385f..cf3c9f00 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -30,9 +30,8 @@ Skeleton NIC driver for Etherboot #include "arbel.h" - - - +/* Port to use */ +#define PXE_IB_PORT 1 /*************************************************************************** * @@ -242,6 +241,22 @@ arbel_cmd_close_hca ( struct arbel *arbel ) { 0, NULL, 0, NULL ); } +static inline int +arbel_cmd_init_ib ( struct arbel *arbel, unsigned int port, + const struct arbelprm_init_ib *init_ib ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_IB, + 1, sizeof ( *init_ib ) ), + 0, init_ib, port, NULL ); +} + +static inline int +arbel_cmd_close_ib ( struct arbel *arbel, unsigned int port ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_IB ), + 0, NULL, port, NULL ); +} + static inline int arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, const struct arbelprm_mpt *mpt ) { @@ -251,6 +266,22 @@ arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, 0, mpt, index, NULL ); } +static inline int +arbel_cmd_sw2hw_eq ( struct arbel *arbel, unsigned int index, + const struct arbelprm_eqc *eqc ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_EQ, + 1, sizeof ( *eqc ) ), + 0, eqc, index, NULL ); +} + +static inline int +arbel_cmd_hw2sw_eq ( struct arbel *arbel, unsigned int index ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_EQ ), + 1, NULL, index, NULL ); +} + static inline int arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, const struct arbelprm_completion_queue_context *cqctx ) { @@ -421,16 +452,6 @@ arbel_cmd_map_fa ( struct arbel *arbel, 0, map, 1, NULL ); } -/*************************************************************************** - * - * Event queue operations - * - *************************************************************************** - */ - -static int arbel_create_eq ( struct arbel *arbel ) { -} - /*************************************************************************** * * Completion queue operations @@ -508,7 +529,7 @@ static int arbel_create_cq ( struct ib_device *ibdev, MLX_FILL_2 ( &cqctx, 3, usr_page, arbel->limits.reserved_uars, log_cq_size, fls ( cq->num_cqes - 1 ) ); - MLX_FILL_1 ( &cqctx, 5, c_eqn, arbel->eqn ); + MLX_FILL_1 ( &cqctx, 5, c_eqn, ARBEL_NO_EQ ); MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); @@ -1423,6 +1444,38 @@ static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { return 0; } +/** + * Wait for link up + * + * @v arbel Arbel device + * @ret rc Return status code + * + * This function shouldn't really exist. Unfortunately, IB links take + * a long time to come up, and we can't get various key parameters + * e.g. our own IPoIB MAC address without information from the subnet + * manager). We should eventually make link-up an asynchronous event. + */ +static int arbel_wait_for_link ( struct arbel *arbel ) { + struct ib_mad_port_info port_info; + unsigned int retries; + int rc; + + printf ( "Waiting for Infiniband link-up..." ); + for ( retries = 20 ; retries ; retries-- ) { + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + continue; + if ( ( ( port_info.port_state__link_speed_supported ) & 0xf ) + == 4 ) { + printf ( "ok\n" ); + return 0; + } + printf ( "." ); + sleep ( 1 ); + } + printf ( "failed\n" ); + return -ENODEV; +}; + /** * Get MAD parameters * @@ -1819,6 +1872,54 @@ static void arbel_free_icm ( struct arbel *arbel ) { arbel->icm = UNULL; } +/*************************************************************************** + * + * Infiniband link-layer operations + * + *************************************************************************** + */ + +/** + * Initialise Infiniband link + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_init_ib ( struct arbel *arbel ) { + struct arbelprm_init_ib init_ib; + int rc; + + memset ( &init_ib, 0, sizeof ( init_ib ) ); + MLX_FILL_3 ( &init_ib, 0, + mtu_cap, ARBEL_MTU_2048, + port_width_cap, 3, + vl_cap, 1 ); + MLX_FILL_1 ( &init_ib, 2, max_pkey, 64 ); + if ( ( rc = arbel_cmd_init_ib ( arbel, PXE_IB_PORT, + &init_ib ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not intialise IB: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Close Infiniband link + * + * @v arbel Arbel device + */ +static void arbel_close_ib ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_close_ib ( arbel, PXE_IB_PORT ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not close IB: %s\n", + arbel, strerror ( rc ) ); + /* Nothing we can do about this */ + } +} + /*************************************************************************** * * PCI interface @@ -1933,6 +2034,16 @@ static int arbel_probe ( struct pci_device *pci, if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) goto err_alloc_icm; + + unsigned long uar_offset = ( arbel->limits.reserved_uars * 4096 ); + arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, + uar_offset ) ); + memset ( arbel->db_rec, 0, 4096 ); + union arbelprm_doorbell_record *db_rec; + db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; + MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); + + /* Initialise HCA */ MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { @@ -1945,7 +2056,14 @@ static int arbel_probe ( struct pci_device *pci, if ( ( rc = arbel_setup_mpt ( arbel ) ) != 0 ) goto err_setup_mpt; - + /* Bring up IB layer */ + if ( ( rc = arbel_init_ib ( arbel ) ) != 0 ) + goto err_init_ib; + + /* Wait for link */ + if ( ( rc = arbel_wait_for_link ( arbel ) ) != 0 ) + goto err_wait_for_link; + #endif @@ -1957,17 +2075,24 @@ static int arbel_probe ( struct pci_device *pci, arbel->mailbox_in = dev_buffers_p->inprm_buf; arbel->mailbox_out = dev_buffers_p->outprm_buf; #endif -#if ! SELF_INIT +#if SELF_INIT +#else arbel->reserved_lkey = dev_ib_data.mkey; -#endif arbel->db_rec = dev_ib_data.uar_context_base; - arbel->eqn = dev_ib_data.eq.eqn; +#endif + // arbel->eqn = dev_ib_data.eq.eqn; /* Get MAD parameters */ if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) goto err_get_mad_params; + DBGC ( arbel, "Arbel %p port GID is %08lx:%08lx:%08lx:%08lx\n", arbel, + htonl ( ibdev->port_gid.u.dwords[0] ), + htonl ( ibdev->port_gid.u.dwords[1] ), + htonl ( ibdev->port_gid.u.dwords[2] ), + htonl ( ibdev->port_gid.u.dwords[3] ) ); + /* Add IPoIB device */ if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", @@ -1982,6 +2107,9 @@ static int arbel_probe ( struct pci_device *pci, ib_driver_close ( 0 ); err_ib_driver_init: + err_wait_for_link: + arbel_close_ib ( arbel ); + err_init_ib: err_setup_mpt: arbel_cmd_close_hca ( arbel ); err_init_hca: From a5ec029d24b34de9ad9068b543a7e181fe1c70a3 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 05:46:49 +0100 Subject: [PATCH 75/84] First version that works with SELF_INIT! --- src/drivers/net/mlx_ipoib/mt25218.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index cf3c9f00..338a7db1 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -1894,6 +1894,7 @@ static int arbel_init_ib ( struct arbel *arbel ) { mtu_cap, ARBEL_MTU_2048, port_width_cap, 3, vl_cap, 1 ); + MLX_FILL_1 ( &init_ib, 1, max_gid, 1 ); MLX_FILL_1 ( &init_ib, 2, max_pkey, 64 ); if ( ( rc = arbel_cmd_init_ib ( arbel, PXE_IB_PORT, &init_ib ) ) != 0 ) { From 982e4dd101f9cb24b06dcdd96a2a6c4316d67c7d Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 06:06:55 +0100 Subject: [PATCH 76/84] Separated out to a clean new drivers/infiniband directory. --- src/Makefile | 6 +- src/drivers/infiniband/MT25218_PRM.h | 3460 ++++++++++++++++++++++++++ src/drivers/infiniband/arbel.c | 2130 ++++++++++++++++ src/drivers/infiniband/arbel.h | 459 ++++ src/drivers/infiniband/mlx_bitops.h | 209 ++ 5 files changed, 6259 insertions(+), 5 deletions(-) create mode 100644 src/drivers/infiniband/MT25218_PRM.h create mode 100644 src/drivers/infiniband/arbel.c create mode 100644 src/drivers/infiniband/arbel.h create mode 100644 src/drivers/infiniband/mlx_bitops.h diff --git a/src/Makefile b/src/Makefile index f0e85e1e..5327a520 100644 --- a/src/Makefile +++ b/src/Makefile @@ -152,6 +152,7 @@ SRCDIRS += drivers/scsi SRCDIRS += drivers/ata SRCDIRS += drivers/nvs SRCDIRS += drivers/bitbash +SRCDIRS += drivers/infiniband SRCDIRS += interface/pxe SRCDIRS += tests SRCDIRS += crypto crypto/axtls crypto/matrixssl @@ -165,11 +166,6 @@ SRCDIRS += usr NON_AUTO_SRCS += core/elf_loader.c NON_AUTO_SRCS += drivers/net/prism2.c -SRCS += drivers/net/mlx_ipoib/mt25218.c -SRCS += drivers/net/mlx_ipoib/mt23108.c -CFLAGS_mt25218 = -Wno-error -CFLAGS_mt23108 = -Wno-error - # Rules for finalising files. TGT_MAKEROM_FLAGS is defined as part of # the automatic build system and varies by target; it includes the # "-p 0x1234,0x5678" string to set the PCI IDs. diff --git a/src/drivers/infiniband/MT25218_PRM.h b/src/drivers/infiniband/MT25218_PRM.h new file mode 100644 index 00000000..19ca92cd --- /dev/null +++ b/src/drivers/infiniband/MT25218_PRM.h @@ -0,0 +1,3460 @@ +/* + This software is available to you under a choice of one of two + licenses. You may choose to be licensed under the terms of the GNU + General Public License (GPL) Version 2, available at + , or the OpenIB.org BSD + license, available in the LICENSE.TXT file accompanying this + software. These details are also available at + . + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + + Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. +*/ + +/*** + *** This file was generated at "Tue Nov 22 15:21:23 2005" + *** by: + *** % csp_bf -copyright=/mswg/misc/license-header.txt -prefix arbelprm_ -bits -fixnames MT25218_PRM.csp + ***/ + +#ifndef H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H +#define H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H + +/* UD Address Vector */ + +struct arbelprm_ud_address_vector_st { /* Little Endian */ + pseudo_bit_t pd[0x00018]; /* Protection Domain */ + pseudo_bit_t port_number[0x00002]; /* Port number + 1 - Port 1 + 2 - Port 2 + other - reserved */ + pseudo_bit_t reserved0[0x00006]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (Destination) LID */ + pseudo_bit_t my_lid_path_bits[0x00007];/* Source LID - the lower 7 bits (upper bits are taken from PortInfo) */ + pseudo_bit_t g[0x00001]; /* Global address enable - if set, GRH will be formed for packet header */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t hop_limit[0x00008]; /* IPv6 hop limit */ + pseudo_bit_t max_stat_rate[0x00003];/* Maximum static rate control. + 0 - 4X injection rate + 1 - 1X injection rate + other - reserved + */ + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t msg[0x00002]; /* Max Message size, size is 256*2^MSG bytes */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t mgid_index[0x00006]; /* Index to port GID table + mgid_index = (port_number-1) * 2^log_max_gid + gid_index + Where: + 1. log_max_gid is taken from QUERY_DEV_LIM command + 2. gid_index is the index to the GID table */ + pseudo_bit_t reserved4[0x0000a]; +/* -------------- */ + pseudo_bit_t flow_label[0x00014]; /* IPv6 flow label */ + pseudo_bit_t tclass[0x00008]; /* IPv6 TClass */ + pseudo_bit_t sl[0x00004]; /* InfiniBand Service Level (SL) */ +/* -------------- */ + pseudo_bit_t rgid_127_96[0x00020]; /* Remote GID[127:96] */ +/* -------------- */ + pseudo_bit_t rgid_95_64[0x00020]; /* Remote GID[95:64] */ +/* -------------- */ + pseudo_bit_t rgid_63_32[0x00020]; /* Remote GID[63:32] */ +/* -------------- */ + pseudo_bit_t rgid_31_0[0x00020]; /* Remote GID[31:0] if G bit is set. Must be set to 0x2 if G bit is cleared. */ +/* -------------- */ +}; + +/* Send doorbell */ + +struct arbelprm_send_doorbell_st { /* Little Endian */ + pseudo_bit_t nopcode[0x00005]; /* Opcode of descriptor to be executed */ + pseudo_bit_t f[0x00001]; /* Fence bit. If set, descriptor is fenced */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t wqe_counter[0x00010]; /* Modulo-64K counter of WQEs posted to the QP since its creation excluding the newly posted WQEs in this doorbell. Should be zero for the first doorbell on the QP */ + pseudo_bit_t wqe_cnt[0x00008]; /* Number of WQEs posted with this doorbell. Must be grater then zero. */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next descriptor size (in 16-byte chunks) */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t qpn[0x00018]; /* QP number this doorbell is rung on */ +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors_input_modifier */ + +struct arbelprm_access_lam_inject_errors_input_modifier_st { /* Little Endian */ + pseudo_bit_t index3[0x00007]; + pseudo_bit_t q3[0x00001]; + pseudo_bit_t index2[0x00007]; + pseudo_bit_t q2[0x00001]; + pseudo_bit_t index1[0x00007]; + pseudo_bit_t q1[0x00001]; + pseudo_bit_t index0[0x00007]; + pseudo_bit_t q0[0x00001]; +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors_input_parameter */ + +struct arbelprm_access_lam_inject_errors_input_parameter_st { /* Little Endian */ + pseudo_bit_t ba[0x00002]; /* Bank Address */ + pseudo_bit_t da[0x00002]; /* Dimm Address */ + pseudo_bit_t reserved0[0x0001c]; +/* -------------- */ + pseudo_bit_t ra[0x00010]; /* Row Address */ + pseudo_bit_t ca[0x00010]; /* Column Address */ +/* -------------- */ +}; + +/* */ + +struct arbelprm_recv_wqe_segment_next_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00006]; + pseudo_bit_t nda_31_6[0x0001a]; /* Next WQE address, low 32 bit. WQE address must be aligned to 64-byte boundary (6 LSB are forced ZERO). */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next WQE size in OctoWords (16 bytes). + Zero value in NDS field signals end of WQEs? chain. + */ + pseudo_bit_t reserved1[0x0001a]; +/* -------------- */ +}; + +/* Send wqe segment data inline */ + +struct arbelprm_wqe_segment_data_inline_st { /* Little Endian */ + pseudo_bit_t byte_count[0x0000a]; /* Not including padding for 16Byte chunks */ + pseudo_bit_t reserved0[0x00015]; + pseudo_bit_t always1[0x00001]; +/* -------------- */ + pseudo_bit_t data[0x00018]; /* Data may be more this segment size - in 16Byte chunks */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment data ptr */ + +struct arbelprm_wqe_segment_data_ptr_st { /* Little Endian */ + pseudo_bit_t byte_count[0x0001f]; + pseudo_bit_t always0[0x00001]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; +/* -------------- */ + pseudo_bit_t local_address_h[0x00020]; +/* -------------- */ + pseudo_bit_t local_address_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment rd */ + +struct arbelprm_local_invalidate_segment_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t mem_key[0x00018]; + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x000a0]; +/* -------------- */ +}; + +/* Fast_Registration_Segment */ + +struct arbelprm_fast_registration_segment_st { /* Little Endian */ + pseudo_bit_t reserved0[0x0001b]; + pseudo_bit_t lr[0x00001]; /* If set - Local Read access will be enabled */ + pseudo_bit_t lw[0x00001]; /* If set - Local Write access will be enabled */ + pseudo_bit_t rr[0x00001]; /* If set - Remote Read access will be enabled */ + pseudo_bit_t rw[0x00001]; /* If set - Remote Write access will be enabled */ + pseudo_bit_t a[0x00001]; /* If set - Remote Atomic access will be enabled */ +/* -------------- */ + pseudo_bit_t pbl_ptr_63_32[0x00020];/* Physical address pointer [63:32] to the physical buffer list */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* Memory Key on which the fast registration is executed on. */ +/* -------------- */ + pseudo_bit_t page_size[0x00005]; /* Page size used for the region. Actual size is [4K]*2^Page_size bytes. + page_size should be less than 20. */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t zb[0x00001]; /* Zero Based Region */ + pseudo_bit_t pbl_ptr_31_8[0x00018]; /* Physical address pointer [31:8] to the physical buffer list */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address[63:32] - Virtual Address where this region starts */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address[31:0] - Virtual Address where this region starts */ +/* -------------- */ + pseudo_bit_t reg_len_h[0x00020]; /* Region Length[63:32] */ +/* -------------- */ + pseudo_bit_t reg_len_l[0x00020]; /* Region Length[31:0] */ +/* -------------- */ +}; + +/* Send wqe segment atomic */ + +struct arbelprm_wqe_segment_atomic_st { /* Little Endian */ + pseudo_bit_t swap_add_h[0x00020]; +/* -------------- */ + pseudo_bit_t swap_add_l[0x00020]; +/* -------------- */ + pseudo_bit_t compare_h[0x00020]; +/* -------------- */ + pseudo_bit_t compare_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment remote address */ + +struct arbelprm_wqe_segment_remote_address_st { /* Little Endian */ + pseudo_bit_t remote_virt_addr_h[0x00020]; +/* -------------- */ + pseudo_bit_t remote_virt_addr_l[0x00020]; +/* -------------- */ + pseudo_bit_t rkey[0x00020]; +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* end wqe segment bind */ + +struct arbelprm_wqe_segment_bind_st { /* Little Endian */ + pseudo_bit_t reserved0[0x0001d]; + pseudo_bit_t rr[0x00001]; /* If set, Remote Read Enable for bound window. */ + pseudo_bit_t rw[0x00001]; /* If set, Remote Write Enable for bound window. + */ + pseudo_bit_t a[0x00001]; /* If set, Atomic Enable for bound window. */ +/* -------------- */ + pseudo_bit_t reserved1[0x0001e]; + pseudo_bit_t zb[0x00001]; /* If set, Window is Zero Based. */ + pseudo_bit_t type[0x00001]; /* Window type. + 0 - Type one window + 1 - Type two window + */ +/* -------------- */ + pseudo_bit_t new_rkey[0x00020]; /* The new RKey of window to bind */ +/* -------------- */ + pseudo_bit_t region_lkey[0x00020]; /* Local key of region, which window will be bound to */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020]; +/* -------------- */ + pseudo_bit_t start_address_l[0x00020]; +/* -------------- */ + pseudo_bit_t length_h[0x00020]; +/* -------------- */ + pseudo_bit_t length_l[0x00020]; +/* -------------- */ +}; + +/* Send wqe segment ud */ + +struct arbelprm_wqe_segment_ud_st { /* Little Endian */ + struct arbelprm_ud_address_vector_st ud_address_vector;/* UD Address Vector */ +/* -------------- */ + pseudo_bit_t destination_qp[0x00018]; + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t q_key[0x00020]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment rd */ + +struct arbelprm_wqe_segment_rd_st { /* Little Endian */ + pseudo_bit_t destination_qp[0x00018]; + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t q_key[0x00020]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Send wqe segment ctrl */ + +struct arbelprm_wqe_segment_ctrl_send_st { /* Little Endian */ + pseudo_bit_t always1[0x00001]; + pseudo_bit_t s[0x00001]; /* Solicited Event bit. If set, SE (Solicited Event) bit is set in the (last packet of) message. */ + pseudo_bit_t e[0x00001]; /* Event bit. If set, event is generated upon WQE?s completion, if QP is allowed to generate an event. Every WQE with E-bit set generates an event. The C bit must be set on unsignalled QPs if the E bit is set. */ + pseudo_bit_t c[0x00001]; /* Completion Queue bit. Valid for unsignalled QPs only. If set, the CQ is updated upon WQE?s completion */ + pseudo_bit_t ip[0x00001]; /* When set, InfiniHost III Ex will calculate the IP checksum of the IP header that is present immediately after the IPoverIB encapsulation header. In the case of multiple headers (encapsulation), InfiniHost III Ex will calculate the checksum only for the first IP header following the IPoverIB encapsulation header. Not Valid for IPv6 packets */ + pseudo_bit_t tcp_udp[0x00001]; /* When set, InfiniHost III Ex will calculate the TCP/UDP checksum of the packet that is present immediately after the IP header. In the case of multiple headers (encapsulation), InfiniHost III Ex will calculate the checksum only for the first TCP header following the IP header. This bit may be set only if the entire TCP/UDP segment is present in one IB packet */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t so[0x00001]; /* Strong Ordering - when set, the WQE will be executed only after all previous WQEs have been executed. Can be set for RC WQEs only. This bit must be set in type two BIND, Fast Registration and Local invalidate operations. */ + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ + pseudo_bit_t immediate[0x00020]; /* If the OpCode encodes an operation with Immediate (RDMA-write/SEND), This field will hold the Immediate data to be sent. If the OpCode encodes send and invalidate operations, this field holds the Invalidation key to be inserted into the packet; otherwise, this field is reserved. */ +/* -------------- */ +}; + +/* Send wqe segment next */ + +struct arbelprm_wqe_segment_next_st { /* Little Endian */ + pseudo_bit_t nopcode[0x00005]; /* Next Opcode: OpCode to be used in the next WQE. Encodes the type of operation to be executed on the QP: + ?00000? - NOP. WQE with this opcode creates a completion, but does nothing else + ?01000? - RDMA-write + ?01001? - RDMA-Write with Immediate + ?10000? - RDMA-read + ?10001? - Atomic Compare & swap + ?10010? - Atomic Fetch & Add + ?11000? - Bind memory window + + The encoding for the following operations depends on the QP type: + For RC, UC and RD QP: + ?01010? - SEND + ?01011? - SEND with Immediate + + For UD QP: + the encoding depends on the values of bit[31] of the Q_key field in the Datagram Segment (see Table 39, ?Unreliable Datagram Segment Format - Pointers,? on page 101) of + both the current WQE and the next WQE, as follows: + + If the last WQE Q_Key bit[31] is clear and the next WQE Q_key bit[31] is set : + ?01000? - SEND + ?01001? - SEND with Immediate + + otherwise (if the next WQE Q_key bit[31] is cleared, or the last WQE Q_Key bit[31] is set): + ?01010? - SEND + ?01011? - SEND with Immediate + + All other opcode values are RESERVED, and will result in invalid operation execution. */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t nda_31_6[0x0001a]; /* Next WQE address, low 32 bit. WQE address must be aligned to 64-byte boundary (6 LSB are forced ZERO). */ +/* -------------- */ + pseudo_bit_t nds[0x00006]; /* Next WQE size in OctoWords (16 bytes). + Zero value in NDS field signals end of WQEs? chain. + */ + pseudo_bit_t f[0x00001]; /* Fence bit. If set, next WQE will start execution only after all previous Read/Atomic WQEs complete. */ + pseudo_bit_t always1[0x00001]; + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ +}; + +/* Address Path */ + +struct arbelprm_address_path_st { /* Little Endian */ + pseudo_bit_t pkey_index[0x00007]; /* PKey table index */ + pseudo_bit_t reserved0[0x00011]; + pseudo_bit_t port_number[0x00002]; /* Specific port associated with this QP/EE. + 1 - Port 1 + 2 - Port 2 + other - reserved */ + pseudo_bit_t reserved1[0x00006]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (Destination) LID */ + pseudo_bit_t my_lid_path_bits[0x00007];/* Source LID - the lower 7 bits (upper bits are taken from PortInfo) */ + pseudo_bit_t g[0x00001]; /* Global address enable - if set, GRH will be formed for packet header */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t rnr_retry[0x00003]; /* RNR retry count (see C9-132 in IB spec Vol 1) + 0-6 - number of retries + 7 - infinite */ +/* -------------- */ + pseudo_bit_t hop_limit[0x00008]; /* IPv6 hop limit */ + pseudo_bit_t max_stat_rate[0x00003];/* Maximum static rate control. + 0 - 100% injection rate + 1 - 25% injection rate + 2 - 12.5% injection rate + 3 - 50% injection rate + other - reserved */ + pseudo_bit_t reserved3[0x00005]; + pseudo_bit_t mgid_index[0x00006]; /* Index to port GID table */ + pseudo_bit_t reserved4[0x00005]; + pseudo_bit_t ack_timeout[0x00005]; /* Local ACK timeout - Transport timer for activation of retransmission mechanism. Refer to IB spec Vol1 9.7.6.1.3 for further details. + The transport timer is set to 4.096us*2^ack_timeout, if ack_timeout is 0 then transport timer is disabled. */ +/* -------------- */ + pseudo_bit_t flow_label[0x00014]; /* IPv6 flow label */ + pseudo_bit_t tclass[0x00008]; /* IPv6 TClass */ + pseudo_bit_t sl[0x00004]; /* InfiniBand Service Level (SL) */ +/* -------------- */ + pseudo_bit_t rgid_127_96[0x00020]; /* Remote GID[127:96] */ +/* -------------- */ + pseudo_bit_t rgid_95_64[0x00020]; /* Remote GID[95:64] */ +/* -------------- */ + pseudo_bit_t rgid_63_32[0x00020]; /* Remote GID[63:32] */ +/* -------------- */ + pseudo_bit_t rgid_31_0[0x00020]; /* Remote GID[31:0] */ +/* -------------- */ +}; + +/* HCA Command Register (HCR) */ + +struct arbelprm_hca_command_register_st { /* Little Endian */ + pseudo_bit_t in_param_h[0x00020]; /* Input Parameter: parameter[63:32] or pointer[63:32] to input mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t in_param_l[0x00020]; /* Input Parameter: parameter[31:0] or pointer[31:0] to input mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t input_modifier[0x00020];/* Input Parameter Modifier */ +/* -------------- */ + pseudo_bit_t out_param_h[0x00020]; /* Output Parameter: parameter[63:32] or pointer[63:32] to output mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t out_param_l[0x00020]; /* Output Parameter: parameter[31:0] or pointer[31:0] to output mailbox (see command description) */ +/* -------------- */ + pseudo_bit_t reserved0[0x00010]; + pseudo_bit_t token[0x00010]; /* Software assigned token to the command, to uniquely identify it. The token is returned to the software in the EQE reported. */ +/* -------------- */ + pseudo_bit_t opcode[0x0000c]; /* Command opcode */ + pseudo_bit_t opcode_modifier[0x00004];/* Opcode Modifier, see specific description for each command. */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t e[0x00001]; /* Event Request + 0 - Don't report event (software will poll the GO bit) + 1 - Report event to EQ when the command completes */ + pseudo_bit_t go[0x00001]; /* Go (0=Software ownership for the HCR, 1=Hardware ownership for the HCR) + Software can write to the HCR only if Go bit is cleared. + Software must set the Go bit to trigger the HW to execute the command. Software must not write to this register value other than 1 for the Go bit. */ + pseudo_bit_t status[0x00008]; /* Command execution status report. Valid only if command interface in under SW ownership (Go bit is cleared) + 0 - command completed without error. If different than zero, command execution completed with error. Syndrom encoding is depended on command executed and is defined for each command */ +/* -------------- */ +}; + +/* CQ Doorbell */ + +struct arbelprm_cq_cmd_doorbell_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number accessed */ + pseudo_bit_t cmd[0x00003]; /* Command to be executed on CQ + 0x0 - Reserved + 0x1 - Request notification for next Solicited completion event. CQ_param specifies the current CQ Consumer Counter. + 0x2 - Request notification for next Solicited or Unsolicited completion event. CQ_param specifies the current CQ Consumer Counter. + 0x3 - Request notification for multiple completions (Arm-N). CQ_param specifies the value of the CQ Counter that when reached by HW (i.e. HW generates a CQE into this Counter) Event will be generated + Other - Reserved */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t cmd_sn[0x00002]; /* Command Sequence Number - This field should be incremented upon receiving completion notification of the respective CQ. + This transition is done by ringing Request notification for next Solicited, Request notification for next Solicited or Unsolicited + completion or Request notification for multiple completions doorbells after receiving completion notification. + This field is initialized to Zero */ + pseudo_bit_t reserved1[0x00002]; +/* -------------- */ + pseudo_bit_t cq_param[0x00020]; /* parameter to be used by CQ command */ +/* -------------- */ +}; + +/* RD-send doorbell */ + +struct arbelprm_rd_send_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t een[0x00018]; /* End-to-end context number (reliable datagram) + Must be zero for Nop and Bind operations */ +/* -------------- */ + pseudo_bit_t reserved1[0x00008]; + pseudo_bit_t qpn[0x00018]; /* QP number this doorbell is rung on */ +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send Parameters */ +/* -------------- */ +}; + +/* Multicast Group Member QP */ + +struct arbelprm_mgmqp_st { /* Little Endian */ + pseudo_bit_t qpn_i[0x00018]; /* QPN_i: QP number which is a member in this multicast group. Valid only if Qi bit is set. Length of the QPN_i list is set in INIT_HCA */ + pseudo_bit_t reserved0[0x00007]; + pseudo_bit_t qi[0x00001]; /* Qi: QPN_i is valid */ +/* -------------- */ +}; + +/* vsd */ + +struct arbelprm_vsd_st { /* Little Endian */ + pseudo_bit_t vsd_dw0[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw1[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw2[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw3[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw4[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw5[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw6[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw7[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw8[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw9[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw10[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw11[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw12[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw13[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw14[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw15[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw16[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw17[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw18[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw19[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw20[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw21[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw22[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw23[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw24[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw25[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw26[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw27[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw28[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw29[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw30[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw31[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw32[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw33[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw34[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw35[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw36[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw37[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw38[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw39[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw40[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw41[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw42[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw43[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw44[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw45[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw46[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw47[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw48[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw49[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw50[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw51[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw52[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw53[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw54[0x00020]; +/* -------------- */ + pseudo_bit_t vsd_dw55[0x00020]; +/* -------------- */ +}; + +/* ACCESS_LAM_inject_errors */ + +struct arbelprm_access_lam_inject_errors_st { /* Little Endian */ + struct arbelprm_access_lam_inject_errors_input_parameter_st access_lam_inject_errors_input_parameter; +/* -------------- */ + struct arbelprm_access_lam_inject_errors_input_modifier_st access_lam_inject_errors_input_modifier; +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* Logical DIMM Information */ + +struct arbelprm_dimminfo_st { /* Little Endian */ + pseudo_bit_t dimmsize[0x00010]; /* Size of DIMM in units of 2^20 Bytes. This value is valid only when DIMMStatus is 0. */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t dimmstatus[0x00001]; /* DIMM Status + 0 - Enabled + 1 - Disabled + */ + pseudo_bit_t dh[0x00001]; /* When set, the DIMM is Hidden and can not be accessed from the PCI bus. */ + pseudo_bit_t wo[0x00001]; /* When set, the DIMM is write only. + If data integrity is configured (other than none), the DIMM must be + only targeted by write transactions where the address and size are multiples of 16 bytes. */ + pseudo_bit_t reserved1[0x00005]; +/* -------------- */ + pseudo_bit_t spd[0x00001]; /* 0 - DIMM SPD was read from DIMM + 1 - DIMM SPD was read from InfiniHost-III-EX NVMEM */ + pseudo_bit_t sladr[0x00003]; /* SPD Slave Address 3 LSBits. + Valid only if spd bit is 0. */ + pseudo_bit_t sock_num[0x00002]; /* DIMM socket number (for double sided DIMM one of the two numbers will be reported) */ + pseudo_bit_t syn[0x00004]; /* Error syndrome (valid regardless of status value) + 0 - DIMM has no error + 1 - SPD error (e.g. checksum error, no response, error while reading) + 2 - DIMM out of bounds (e.g. DIMM rows number is not between 7 and 14, DIMM type is not 2) + 3 - DIMM conflict (e.g. mix of registered and unbuffered DIMMs, CAS latency conflict) + 5 - DIMM size trimmed due to configuration (size exceeds) + other - Error, reserved + */ + pseudo_bit_t reserved2[0x00016]; +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ + pseudo_bit_t dimm_start_adr_h[0x00020];/* DIMM memory start address [63:32]. This value is valid only when DIMMStatus is 0. */ +/* -------------- */ + pseudo_bit_t dimm_start_adr_l[0x00020];/* DIMM memory start address [31:0]. This value is valid only when DIMMStatus is 0. */ +/* -------------- */ + pseudo_bit_t reserved4[0x00040]; +/* -------------- */ +}; + +/* UAR Parameters */ + +struct arbelprm_uar_params_st { /* Little Endian */ + pseudo_bit_t uar_base_addr_h[0x00020];/* UAR Base (pyhsical) Address [63:32] (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t reserved0[0x00014]; + pseudo_bit_t uar_base_addr_l[0x0000c];/* UAR Base (pyhsical) Address [31:20] (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t uar_page_sz[0x00008]; /* This field defines the size of each UAR page. + Size of UAR Page is 4KB*2^UAR_Page_Size */ + pseudo_bit_t log_max_uars[0x00004]; /* Number of UARs supported is 2^log_max_UARs */ + pseudo_bit_t reserved1[0x00004]; + pseudo_bit_t log_uar_entry_sz[0x00006];/* Size of UAR Context entry is 2^log_uar_sz in 4KByte pages */ + pseudo_bit_t reserved2[0x0000a]; +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t uar_scratch_base_addr_h[0x00020];/* Base address of UAR scratchpad [63:32]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size */ +/* -------------- */ + pseudo_bit_t uar_scratch_base_addr_l[0x00020];/* Base address of UAR scratchpad [31:0]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ + pseudo_bit_t uar_context_base_addr_h[0x00020];/* Base address of UAR Context [63:32]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ + pseudo_bit_t uar_context_base_addr_l[0x00020];/* Base address of UAR Context [31:0]. + Number of entries in table is 2^log_max_uars. + Table must be aligned to its size. */ +/* -------------- */ +}; + +/* Translation and Protection Tables Parameters */ + +struct arbelprm_tptparams_st { /* Little Endian */ + pseudo_bit_t mpt_base_adr_h[0x00020];/* MPT - Memory Protection Table base physical address [63:32]. + Entry size is 64 bytes. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t mpt_base_adr_l[0x00020];/* MPT - Memory Protection Table base physical address [31:0]. + Entry size is 64 bytes. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t log_mpt_sz[0x00006]; /* Log (base 2) of the number of region/windows entries in the MPT table. */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t pfto[0x00005]; /* Page Fault RNR Timeout - + The field returned in RNR Naks generated when a page fault is detected. + It has no effect when on-demand-paging is not used. */ + pseudo_bit_t reserved1[0x00013]; +/* -------------- */ + pseudo_bit_t reserved2[0x00020]; +/* -------------- */ + pseudo_bit_t mtt_base_addr_h[0x00020];/* MTT - Memory Translation table base physical address [63:32]. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t mtt_base_addr_l[0x00020];/* MTT - Memory Translation table base physical address [31:0]. + Table must be aligned to its size. + Address may be set to 0xFFFFFFFF if address translation and protection is not supported. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ +}; + +/* Multicast Support Parameters */ + +struct arbelprm_multicastparam_st { /* Little Endian */ + pseudo_bit_t mc_base_addr_h[0x00020];/* Base Address of the Multicast Table [63:32]. + The base address must be aligned to the entry size. + Address may be set to 0xFFFFFFFF if multicast is not supported. */ +/* -------------- */ + pseudo_bit_t mc_base_addr_l[0x00020];/* Base Address of the Multicast Table [31:0]. + The base address must be aligned to the entry size. + Address may be set to 0xFFFFFFFF if multicast is not supported. */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t log_mc_table_entry_sz[0x00010];/* Log2 of the Size of multicast group member (MGM) entry. + Must be greater than 5 (to allow CTRL and GID sections). + That implies the number of QPs per MC table entry. */ + pseudo_bit_t reserved1[0x00010]; +/* -------------- */ + pseudo_bit_t mc_table_hash_sz[0x00011];/* Number of entries in multicast DGID hash table (must be power of 2) + INIT_HCA - the required number of entries + QUERY_HCA - the actual number of entries assigned by firmware (will be less than or equal to the amount required in INIT_HCA) */ + pseudo_bit_t reserved2[0x0000f]; +/* -------------- */ + pseudo_bit_t log_mc_table_sz[0x00005];/* Log2 of the overall number of MC entries in the MCG table (includes both hash and auxiliary tables) */ + pseudo_bit_t reserved3[0x00013]; + pseudo_bit_t mc_hash_fn[0x00003]; /* Multicast hash function + 0 - Default hash function + other - reserved */ + pseudo_bit_t reserved4[0x00005]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ +}; + +/* QPC/EEC/CQC/EQC/RDB Parameters */ + +struct arbelprm_qpcbaseaddr_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t qpc_base_addr_h[0x00020];/* QPC Base Address [63:32] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t log_num_of_qp[0x00005];/* Log base 2 of number of supported QPs */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t qpc_base_addr_l[0x00019];/* QPC Base Address [31:7] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t eec_base_addr_h[0x00020];/* EEC Base Address [63:32] + Table must be aligned on its size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t log_num_of_ee[0x00005];/* Log base 2 of number of supported EEs. */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t eec_base_addr_l[0x00019];/* EEC Base Address [31:7] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t srqc_base_addr_h[0x00020];/* SRQ Context Base Address [63:32] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if SRQ is not supported. */ +/* -------------- */ + pseudo_bit_t log_num_of_srq[0x00005];/* Log base 2 of number of supported SRQs. */ + pseudo_bit_t srqc_base_addr_l[0x0001b];/* SRQ Context Base Address [31:5] + Table must be aligned on its size + Address may be set to 0xFFFFFFFF if SRQ is not supported. */ +/* -------------- */ + pseudo_bit_t cqc_base_addr_h[0x00020];/* CQC Base Address [63:32] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t log_num_of_cq[0x00005];/* Log base 2 of number of supported CQs. */ + pseudo_bit_t reserved4[0x00001]; + pseudo_bit_t cqc_base_addr_l[0x0001a];/* CQC Base Address [31:6] + Table must be aligned on its size */ +/* -------------- */ + pseudo_bit_t reserved5[0x00040]; +/* -------------- */ + pseudo_bit_t eqpc_base_addr_h[0x00020];/* Extended QPC Base Address [63:32] + Table has same number of entries as QPC table. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t eqpc_base_addr_l[0x00020];/* Extended QPC Base Address [31:0] + Table has same number of entries as QPC table. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t reserved6[0x00040]; +/* -------------- */ + pseudo_bit_t eeec_base_addr_h[0x00020];/* Extended EEC Base Address [63:32] + Table has same number of entries as EEC table. + Table must be aligned to entry size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t eeec_base_addr_l[0x00020];/* Extended EEC Base Address [31:0] + Table has same number of entries as EEC table. + Table must be aligned to entry size. + Address may be set to 0xFFFFFFFF if RD is not supported. */ +/* -------------- */ + pseudo_bit_t reserved7[0x00040]; +/* -------------- */ + pseudo_bit_t eqc_base_addr_h[0x00020];/* EQC Base Address [63:32] + Address may be set to 0xFFFFFFFF if EQs are not supported. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t log_num_eq[0x00004]; /* Log base 2 of number of supported EQs. + Must be 6 or less in InfiniHost-III-EX. */ + pseudo_bit_t reserved8[0x00002]; + pseudo_bit_t eqc_base_addr_l[0x0001a];/* EQC Base Address [31:6] + Address may be set to 0xFFFFFFFF if EQs are not supported. + Table must be aligned to entry size. */ +/* -------------- */ + pseudo_bit_t reserved9[0x00040]; +/* -------------- */ + pseudo_bit_t rdb_base_addr_h[0x00020];/* Base address of table that holds remote read and remote atomic requests [63:32]. + Address may be set to 0xFFFFFFFF if remote RDMA reads are not supported. + Please refer to QP and EE chapter for further explanation on RDB allocation. */ +/* -------------- */ + pseudo_bit_t rdb_base_addr_l[0x00020];/* Base address of table that holds remote read and remote atomic requests [31:0]. + Table must be aligned to RDB entry size (32 bytes). + Address may be set to zero if remote RDMA reads are not supported. + Please refer to QP and EE chapter for further explanation on RDB allocation. */ +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ +}; + +/* Header_Log_Register */ + +struct arbelprm_header_log_register_st { /* Little Endian */ + pseudo_bit_t place_holder[0x00020]; +/* -------------- */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ +}; + +/* Performance Monitors */ + +struct arbelprm_performance_monitors_st { /* Little Endian */ + pseudo_bit_t e0[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t e1[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t e2[0x00001]; /* Enables counting of respective performance counter */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t r0[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t r1[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t r2[0x00001]; /* If written to as '1 - resets respective performance counter, if written to az '0 - no change to matter */ + pseudo_bit_t reserved1[0x00001]; + pseudo_bit_t i0[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t i1[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t i2[0x00001]; /* Interrupt enable on respective counter overflow. '1 - interrupt enabled, '0 - interrupt disabled. */ + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t f0[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t f1[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t f2[0x00001]; /* Overflow flag. If set, overflow occurred on respective counter. Cleared if written to as '1 */ + pseudo_bit_t reserved3[0x00001]; + pseudo_bit_t ev_cnt1[0x00005]; /* Specifies event to be counted by Event_counter1 See XXX for events' definition. */ + pseudo_bit_t reserved4[0x00003]; + pseudo_bit_t ev_cnt2[0x00005]; /* Specifies event to be counted by Event_counter2 See XXX for events' definition. */ + pseudo_bit_t reserved5[0x00003]; +/* -------------- */ + pseudo_bit_t clock_counter[0x00020]; +/* -------------- */ + pseudo_bit_t event_counter1[0x00020]; +/* -------------- */ + pseudo_bit_t event_counter2[0x00020];/* Read/write event counter, counting events specified by EvCntl and EvCnt2 fields repsectively. When the event counter reaches is maximum value of 0xFFFFFF, the next event will cause it to roll over to zero, set F1 or F2 bit respectively and generate interrupt by I1 I2 bit respectively. */ +/* -------------- */ +}; + +/* Receive segment format */ + +struct arbelprm_wqe_segment_ctrl_recv_st { /* Little Endian */ + struct arbelprm_recv_wqe_segment_next_st wqe_segment_next; +/* -------------- */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t reserved1[0x00001]; + pseudo_bit_t reserved2[0x00001]; + pseudo_bit_t reserved3[0x0001c]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ +}; + +/* MLX WQE segment format */ + +struct arbelprm_wqe_segment_ctrl_mlx_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00002]; + pseudo_bit_t e[0x00001]; /* WQE event */ + pseudo_bit_t c[0x00001]; /* Create CQE (for "requested signalling" QP) */ + pseudo_bit_t icrc[0x00002]; /* icrc field detemines what to do with the last dword of the packet: 0 - Calculate ICRC and put it instead of last dword. Last dword must be 0x0. 1,2 - reserved. 3 - Leave last dword as is. Last dword must not be 0x0. */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t sl[0x00004]; + pseudo_bit_t max_statrate[0x00004]; + pseudo_bit_t slr[0x00001]; /* 0= take slid from port. 1= take slid from given headers */ + pseudo_bit_t v15[0x00001]; /* Send packet over VL15 */ + pseudo_bit_t reserved2[0x0000e]; +/* -------------- */ + pseudo_bit_t vcrc[0x00010]; /* Packet's VCRC (if not 0 - otherwise computed by HW) */ + pseudo_bit_t rlid[0x00010]; /* Destination LID (must match given headers) */ +/* -------------- */ + pseudo_bit_t reserved3[0x00040]; +/* -------------- */ +}; + +/* Send WQE segment format */ + +struct arbelprm_send_wqe_segment_st { /* Little Endian */ + struct arbelprm_wqe_segment_next_st wqe_segment_next;/* Send wqe segment next */ +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_send_st wqe_segment_ctrl_send;/* Send wqe segment ctrl */ +/* -------------- */ + struct arbelprm_wqe_segment_rd_st wqe_segment_rd;/* Send wqe segment rd */ +/* -------------- */ + struct arbelprm_wqe_segment_ud_st wqe_segment_ud;/* Send wqe segment ud */ +/* -------------- */ + struct arbelprm_wqe_segment_bind_st wqe_segment_bind;/* Send wqe segment bind */ +/* -------------- */ + pseudo_bit_t reserved0[0x00180]; +/* -------------- */ + struct arbelprm_wqe_segment_remote_address_st wqe_segment_remote_address;/* Send wqe segment remote address */ +/* -------------- */ + struct arbelprm_wqe_segment_atomic_st wqe_segment_atomic;/* Send wqe segment atomic */ +/* -------------- */ + struct arbelprm_fast_registration_segment_st fast_registration_segment;/* Fast Registration Segment */ +/* -------------- */ + struct arbelprm_local_invalidate_segment_st local_invalidate_segment;/* local invalidate segment */ +/* -------------- */ + struct arbelprm_wqe_segment_data_ptr_st wqe_segment_data_ptr;/* Send wqe segment data ptr */ +/* -------------- */ + struct arbelprm_wqe_segment_data_inline_st wqe_segment_data_inline;/* Send wqe segment data inline */ +/* -------------- */ + pseudo_bit_t reserved1[0x00200]; +/* -------------- */ +}; + +/* QP and EE Context Entry */ + +struct arbelprm_queue_pair_ee_context_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t de[0x00001]; /* Send/Receive Descriptor Event enable - if set, events can be generated upon descriptors' completion on send/receive queue (controlled by E bit in WQE). Invalid in EE context */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t pm_state[0x00002]; /* Path migration state (Migrated, Armed or Rearm) + 11-Migrated + 00-Armed + 01-Rearm + 10-Reserved + Should be set to 11 for UD QPs and for QPs which do not support APM */ + pseudo_bit_t reserved2[0x00003]; + pseudo_bit_t st[0x00003]; /* Service type (invalid in EE context): + 000-Reliable Connection + 001-Unreliable Connection + 010-Reliable Datagram + 011-Unreliable Datagram + 111-MLX transport (raw bits injection). Used for management QPs and RAW */ + pseudo_bit_t reserved3[0x00009]; + pseudo_bit_t state[0x00004]; /* QP/EE state: + 0 - RST + 1 - INIT + 2 - RTR + 3 - RTS + 4 - SQEr + 5 - SQD (Send Queue Drained) + 6 - ERR + 7 - Send Queue Draining + 8 - Reserved + 9 - Suspended + A- F - Reserved + (Valid for QUERY_QPEE and ERR2RST_QPEE commands only) */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t sched_queue[0x00004]; /* Schedule queue to be used for WQE scheduling to execution. Determines QOS for this QP. */ + pseudo_bit_t rlky[0x00001]; /* When set this QP can use the Reserved L_Key */ + pseudo_bit_t reserved5[0x00003]; + pseudo_bit_t log_sq_stride[0x00003];/* Stride on the send queue. WQ entry is 16*(2^log_SQ_stride) bytes. + Stride must be equal or bigger then 64 bytes (minimum log_RQ_stride value allowed is 2). */ + pseudo_bit_t log_sq_size[0x00004]; /* Log2 of the Number of WQEs in the Send Queue. */ + pseudo_bit_t reserved6[0x00001]; + pseudo_bit_t log_rq_stride[0x00003];/* Stride on the receive queue. WQ entry is 16*(2^log_RQ_stride) bytes. + Stride must be equal or bigger then 64 bytes (minimum log_RQ_stride value allowed is 2). */ + pseudo_bit_t log_rq_size[0x00004]; /* Log2 of the Number of WQEs in the Receive Queue. */ + pseudo_bit_t reserved7[0x00001]; + pseudo_bit_t msg_max[0x00005]; /* Max message size allowed on the QP. Maximum message size is 2^msg_Max. + Must be equal to MTU for UD and MLX QPs. */ + pseudo_bit_t mtu[0x00003]; /* MTU of the QP (Must be the same for both paths: primary and alternative): + 0x1 - 256 bytes + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + other - reserved + + Should be configured to 0x4 for UD and MLX QPs. */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* QP (see "non_privileged Access to the HCA Hardware"). Not valid (reserved) in EE context. */ + pseudo_bit_t reserved8[0x00008]; +/* -------------- */ + pseudo_bit_t local_qpn_een[0x00018];/* Local QP/EE number Lower bits determine position of this record in QPC table, and - thus - constrained + This field is valid for QUERY and ERR2RST commands only. */ + pseudo_bit_t reserved9[0x00008]; +/* -------------- */ + pseudo_bit_t remote_qpn_een[0x00018];/* Remote QP/EE number */ + pseudo_bit_t reserved10[0x00008]; +/* -------------- */ + pseudo_bit_t reserved11[0x00040]; +/* -------------- */ + struct arbelprm_address_path_st primary_address_path;/* Primary address path for the QP/EE */ +/* -------------- */ + struct arbelprm_address_path_st alternative_address_path;/* Alternate address path for the QP/EE */ +/* -------------- */ + pseudo_bit_t rdd[0x00018]; /* Reliable Datagram Domain */ + pseudo_bit_t reserved12[0x00008]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* QP protection domain. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved13[0x00008]; +/* -------------- */ + pseudo_bit_t wqe_base_adr_h[0x00020];/* Bits 63:32 of WQE address for both SQ and RQ. + Reserved for EE context. */ +/* -------------- */ + pseudo_bit_t wqe_lkey[0x00020]; /* memory key (L-Key) to be used to access WQEs. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t reserved14[0x00003]; + pseudo_bit_t ssc[0x00001]; /* Send Signaled Completion + 1 - all send WQEs generate CQEs. + 0 - only send WQEs with C bit set generate completion. + Not valid (reserved) in EE context. */ + pseudo_bit_t sic[0x00001]; /* If set - Ignore end to end credits on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t cur_retry_cnt[0x00003];/* Current transport retry counter (QUERY_QPEE only). + The current transport retry counter can vary from retry_count down to 1, where 1 means that the last retry attempt is currently executing. */ + pseudo_bit_t cur_rnr_retry[0x00003];/* Current RNR retry counter (QUERY_QPEE only). + The current RNR retry counter can vary from rnr_retry to 1, where 1 means that the last retry attempt is currently executing. */ + pseudo_bit_t fre[0x00001]; /* Fast Registration Work Request Enabled. (Reserved for EE) */ + pseudo_bit_t reserved15[0x00001]; + pseudo_bit_t sae[0x00001]; /* If set - Atomic operations enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t swe[0x00001]; /* If set - RDMA - write enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t sre[0x00001]; /* If set - RDMA - read enabled on send queue. Not valid (reserved) in EE context. */ + pseudo_bit_t retry_count[0x00003]; /* Transport timeout Retry count */ + pseudo_bit_t reserved16[0x00002]; + pseudo_bit_t sra_max[0x00003]; /* Maximum number of outstanding RDMA-read/Atomic operations allowed in the send queue. Maximum number is 2^SRA_Max. Must be zero in EE context. */ + pseudo_bit_t flight_lim[0x00004]; /* Number of outstanding (in-flight) messages on the wire allowed for this send queue. + Number of outstanding messages is 2^Flight_Lim. + Use 0xF for unlimited number of outstanding messages. */ + pseudo_bit_t ack_req_freq[0x00004]; /* ACK required frequency. ACK required bit will be set in every 2^AckReqFreq packets at least. Not valid for RD QP. */ +/* -------------- */ + pseudo_bit_t reserved17[0x00020]; +/* -------------- */ + pseudo_bit_t next_send_psn[0x00018];/* Next PSN to be sent */ + pseudo_bit_t reserved18[0x00008]; +/* -------------- */ + pseudo_bit_t cqn_snd[0x00018]; /* CQ number completions from the send queue to be reported to. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved19[0x00008]; +/* -------------- */ + pseudo_bit_t reserved20[0x00006]; + pseudo_bit_t snd_wqe_base_adr_l[0x0001a];/* While opening (creating) the WQ, this field should contain the address of first descriptor to be posted. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t snd_db_record_index[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this SQ doorbell record. + The entry is obtained via the usr_page field. + Not valid for EE. */ +/* -------------- */ + pseudo_bit_t last_acked_psn[0x00018];/* The last acknowledged PSN for the requester (QUERY_QPEE only) */ + pseudo_bit_t reserved21[0x00008]; +/* -------------- */ + pseudo_bit_t ssn[0x00018]; /* Requester Send Sequence Number (QUERY_QPEE only) */ + pseudo_bit_t reserved22[0x00008]; +/* -------------- */ + pseudo_bit_t reserved23[0x00003]; + pseudo_bit_t rsc[0x00001]; /* 1 - all receive WQEs generate CQEs. + 0 - only receive WQEs with C bit set generate completion. + Not valid (reserved) in EE context. + */ + pseudo_bit_t ric[0x00001]; /* Invalid Credits. + 1 - place "Invalid Credits" to ACKs sent from this queue. + 0 - ACKs report the actual number of end to end credits on the connection. + Not valid (reserved) in EE context. + Must be set to 1 on QPs which are attached to SRQ. */ + pseudo_bit_t reserved24[0x00008]; + pseudo_bit_t rae[0x00001]; /* If set - Atomic operations enabled. on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t rwe[0x00001]; /* If set - RDMA - write enabled on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t rre[0x00001]; /* If set - RDMA - read enabled on receive queue. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved25[0x00005]; + pseudo_bit_t rra_max[0x00003]; /* Maximum number of outstanding RDMA-read/Atomic operations allowed on receive queue is 2^RRA_Max. + Must be 0 for EE context. */ + pseudo_bit_t reserved26[0x00008]; +/* -------------- */ + pseudo_bit_t next_rcv_psn[0x00018]; /* Next (expected) PSN on receive */ + pseudo_bit_t min_rnr_nak[0x00005]; /* Minimum RNR NAK timer value (TTTTT field encoding according to the IB spec Vol1 9.7.5.2.8). + Not valid (reserved) in EE context. */ + pseudo_bit_t reserved27[0x00003]; +/* -------------- */ + pseudo_bit_t reserved28[0x00005]; + pseudo_bit_t ra_buff_indx[0x0001b]; /* Index to outstanding read/atomic buffer. + This field constructs the address to the RDB for maintaining the incoming RDMA read and atomic requests. */ +/* -------------- */ + pseudo_bit_t cqn_rcv[0x00018]; /* CQ number completions from receive queue to be reported to. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved29[0x00008]; +/* -------------- */ + pseudo_bit_t reserved30[0x00006]; + pseudo_bit_t rcv_wqe_base_adr_l[0x0001a];/* While opening (creating) the WQ, this field should contain the address of first descriptor to be posted. Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t rcv_db_record_index[0x00020];/* Index in the UAR Context Table Entry containing the doorbell record for the receive queue. + HW uses this index as an offset from the UAR Context Table Entry in order to read this RQ doorbell record. + The entry is obtained via the usr_page field. + Not valid for EE. */ +/* -------------- */ + pseudo_bit_t q_key[0x00020]; /* Q_Key to be validated against received datagrams. + On send datagrams, if Q_Key[31] specified in the WQE is set, then this Q_Key will be transmitted in the outgoing message. + Not valid (reserved) in EE context. */ +/* -------------- */ + pseudo_bit_t srqn[0x00018]; /* SRQN - Shared Receive Queue Number - specifies the SRQ number from which the QP dequeues receive descriptors. + SRQN is valid only if SRQ bit is set. Not valid (reserved) in EE context. */ + pseudo_bit_t srq[0x00001]; /* SRQ - Shared Receive Queue. If this bit is set, then the QP is associated with a SRQ. Not valid (reserved) in EE context. */ + pseudo_bit_t reserved31[0x00007]; +/* -------------- */ + pseudo_bit_t rmsn[0x00018]; /* Responder current message sequence number (QUERY_QPEE only) */ + pseudo_bit_t reserved32[0x00008]; +/* -------------- */ + pseudo_bit_t sq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the SQ. + Must be 0x0 in SQ initialization. + (QUERY_QPEE only). */ + pseudo_bit_t rq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the RQ. + Must be 0x0 in RQ initialization. + (QUERY_QPEE only). */ +/* -------------- */ + pseudo_bit_t reserved33[0x00040]; +/* -------------- */ +}; + +/* Clear Interrupt [63:0] */ + +struct arbelprm_clr_int_st { /* Little Endian */ + pseudo_bit_t clr_int_h[0x00020]; /* Clear Interrupt [63:32] + Write transactions to this register will clear (de-assert) the virtual interrupt output pins of InfiniHost-III-EX. The value to be written in this register is obtained by executing QUERY_ADAPTER command on command interface after system boot. + This register is write-only. Reading from this register will cause undefined result + */ +/* -------------- */ + pseudo_bit_t clr_int_l[0x00020]; /* Clear Interrupt [31:0] + Write transactions to this register will clear (de-assert) the virtual interrupt output pins of InfiniHost-III-EX. The value to be written in this register is obtained by executing QUERY_ADAPTER command on command interface after system boot. + This register is write-only. Reading from this register will cause undefined result */ +/* -------------- */ +}; + +/* EQ_Arm_DB_Region */ + +struct arbelprm_eq_arm_db_region_st { /* Little Endian */ + pseudo_bit_t eq_x_arm_h[0x00020]; /* EQ[63:32] X state. + This register is used to Arm EQs when setting the appropriate bits. */ +/* -------------- */ + pseudo_bit_t eq_x_arm_l[0x00020]; /* EQ[31:0] X state. + This register is used to Arm EQs when setting the appropriate bits. */ +/* -------------- */ +}; + +/* EQ Set CI DBs Table */ + +struct arbelprm_eq_set_ci_table_st { /* Little Endian */ + pseudo_bit_t eq0_set_ci[0x00020]; /* EQ0_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t eq1_set_ci[0x00020]; /* EQ1_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t eq2_set_ci[0x00020]; /* EQ2_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved2[0x00020]; +/* -------------- */ + pseudo_bit_t eq3_set_ci[0x00020]; /* EQ3_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t eq4_set_ci[0x00020]; /* EQ4_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t eq5_set_ci[0x00020]; /* EQ5_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t eq6_set_ci[0x00020]; /* EQ6_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved6[0x00020]; +/* -------------- */ + pseudo_bit_t eq7_set_ci[0x00020]; /* EQ7_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved7[0x00020]; +/* -------------- */ + pseudo_bit_t eq8_set_ci[0x00020]; /* EQ8_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved8[0x00020]; +/* -------------- */ + pseudo_bit_t eq9_set_ci[0x00020]; /* EQ9_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved9[0x00020]; +/* -------------- */ + pseudo_bit_t eq10_set_ci[0x00020]; /* EQ10_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved10[0x00020]; +/* -------------- */ + pseudo_bit_t eq11_set_ci[0x00020]; /* EQ11_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved11[0x00020]; +/* -------------- */ + pseudo_bit_t eq12_set_ci[0x00020]; /* EQ12_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved12[0x00020]; +/* -------------- */ + pseudo_bit_t eq13_set_ci[0x00020]; /* EQ13_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved13[0x00020]; +/* -------------- */ + pseudo_bit_t eq14_set_ci[0x00020]; /* EQ14_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved14[0x00020]; +/* -------------- */ + pseudo_bit_t eq15_set_ci[0x00020]; /* EQ15_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved15[0x00020]; +/* -------------- */ + pseudo_bit_t eq16_set_ci[0x00020]; /* EQ16_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved16[0x00020]; +/* -------------- */ + pseudo_bit_t eq17_set_ci[0x00020]; /* EQ17_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved17[0x00020]; +/* -------------- */ + pseudo_bit_t eq18_set_ci[0x00020]; /* EQ18_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved18[0x00020]; +/* -------------- */ + pseudo_bit_t eq19_set_ci[0x00020]; /* EQ19_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved19[0x00020]; +/* -------------- */ + pseudo_bit_t eq20_set_ci[0x00020]; /* EQ20_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved20[0x00020]; +/* -------------- */ + pseudo_bit_t eq21_set_ci[0x00020]; /* EQ21_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved21[0x00020]; +/* -------------- */ + pseudo_bit_t eq22_set_ci[0x00020]; /* EQ22_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved22[0x00020]; +/* -------------- */ + pseudo_bit_t eq23_set_ci[0x00020]; /* EQ23_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved23[0x00020]; +/* -------------- */ + pseudo_bit_t eq24_set_ci[0x00020]; /* EQ24_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved24[0x00020]; +/* -------------- */ + pseudo_bit_t eq25_set_ci[0x00020]; /* EQ25_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved25[0x00020]; +/* -------------- */ + pseudo_bit_t eq26_set_ci[0x00020]; /* EQ26_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved26[0x00020]; +/* -------------- */ + pseudo_bit_t eq27_set_ci[0x00020]; /* EQ27_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved27[0x00020]; +/* -------------- */ + pseudo_bit_t eq28_set_ci[0x00020]; /* EQ28_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved28[0x00020]; +/* -------------- */ + pseudo_bit_t eq29_set_ci[0x00020]; /* EQ29_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved29[0x00020]; +/* -------------- */ + pseudo_bit_t eq30_set_ci[0x00020]; /* EQ30_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved30[0x00020]; +/* -------------- */ + pseudo_bit_t eq31_set_ci[0x00020]; /* EQ31_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved31[0x00020]; +/* -------------- */ + pseudo_bit_t eq32_set_ci[0x00020]; /* EQ32_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved32[0x00020]; +/* -------------- */ + pseudo_bit_t eq33_set_ci[0x00020]; /* EQ33_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved33[0x00020]; +/* -------------- */ + pseudo_bit_t eq34_set_ci[0x00020]; /* EQ34_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved34[0x00020]; +/* -------------- */ + pseudo_bit_t eq35_set_ci[0x00020]; /* EQ35_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved35[0x00020]; +/* -------------- */ + pseudo_bit_t eq36_set_ci[0x00020]; /* EQ36_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved36[0x00020]; +/* -------------- */ + pseudo_bit_t eq37_set_ci[0x00020]; /* EQ37_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved37[0x00020]; +/* -------------- */ + pseudo_bit_t eq38_set_ci[0x00020]; /* EQ38_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved38[0x00020]; +/* -------------- */ + pseudo_bit_t eq39_set_ci[0x00020]; /* EQ39_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved39[0x00020]; +/* -------------- */ + pseudo_bit_t eq40_set_ci[0x00020]; /* EQ40_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved40[0x00020]; +/* -------------- */ + pseudo_bit_t eq41_set_ci[0x00020]; /* EQ41_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved41[0x00020]; +/* -------------- */ + pseudo_bit_t eq42_set_ci[0x00020]; /* EQ42_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved42[0x00020]; +/* -------------- */ + pseudo_bit_t eq43_set_ci[0x00020]; /* EQ43_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved43[0x00020]; +/* -------------- */ + pseudo_bit_t eq44_set_ci[0x00020]; /* EQ44_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved44[0x00020]; +/* -------------- */ + pseudo_bit_t eq45_set_ci[0x00020]; /* EQ45_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved45[0x00020]; +/* -------------- */ + pseudo_bit_t eq46_set_ci[0x00020]; /* EQ46_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved46[0x00020]; +/* -------------- */ + pseudo_bit_t eq47_set_ci[0x00020]; /* EQ47_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved47[0x00020]; +/* -------------- */ + pseudo_bit_t eq48_set_ci[0x00020]; /* EQ48_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved48[0x00020]; +/* -------------- */ + pseudo_bit_t eq49_set_ci[0x00020]; /* EQ49_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved49[0x00020]; +/* -------------- */ + pseudo_bit_t eq50_set_ci[0x00020]; /* EQ50_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved50[0x00020]; +/* -------------- */ + pseudo_bit_t eq51_set_ci[0x00020]; /* EQ51_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved51[0x00020]; +/* -------------- */ + pseudo_bit_t eq52_set_ci[0x00020]; /* EQ52_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved52[0x00020]; +/* -------------- */ + pseudo_bit_t eq53_set_ci[0x00020]; /* EQ53_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved53[0x00020]; +/* -------------- */ + pseudo_bit_t eq54_set_ci[0x00020]; /* EQ54_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved54[0x00020]; +/* -------------- */ + pseudo_bit_t eq55_set_ci[0x00020]; /* EQ55_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved55[0x00020]; +/* -------------- */ + pseudo_bit_t eq56_set_ci[0x00020]; /* EQ56_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved56[0x00020]; +/* -------------- */ + pseudo_bit_t eq57_set_ci[0x00020]; /* EQ57_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved57[0x00020]; +/* -------------- */ + pseudo_bit_t eq58_set_ci[0x00020]; /* EQ58_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved58[0x00020]; +/* -------------- */ + pseudo_bit_t eq59_set_ci[0x00020]; /* EQ59_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved59[0x00020]; +/* -------------- */ + pseudo_bit_t eq60_set_ci[0x00020]; /* EQ60_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved60[0x00020]; +/* -------------- */ + pseudo_bit_t eq61_set_ci[0x00020]; /* EQ61_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved61[0x00020]; +/* -------------- */ + pseudo_bit_t eq62_set_ci[0x00020]; /* EQ62_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved62[0x00020]; +/* -------------- */ + pseudo_bit_t eq63_set_ci[0x00020]; /* EQ63_Set_CI */ +/* -------------- */ + pseudo_bit_t reserved63[0x00020]; +/* -------------- */ +}; + +/* InfiniHost-III-EX Configuration Registers */ + +struct arbelprm_configuration_registers_st { /* Little Endian */ + pseudo_bit_t reserved0[0x403400]; +/* -------------- */ + struct arbelprm_hca_command_register_st hca_command_interface_register;/* HCA Command Register */ +/* -------------- */ + pseudo_bit_t reserved1[0x3fcb20]; +/* -------------- */ +}; + +/* QP_DB_Record */ + +struct arbelprm_qp_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00010]; /* Modulo-64K counter of WQEs posted to the QP since its creation. Should be initialized to zero. */ + pseudo_bit_t reserved0[0x00010]; +/* -------------- */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t res[0x00003]; /* 0x3 for SQ + 0x4 for RQ + 0x5 for SRQ */ + pseudo_bit_t qp_number[0x00018]; /* QP number */ +/* -------------- */ +}; + +/* CQ_ARM_DB_Record */ + +struct arbelprm_cq_arm_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00020]; /* CQ counter for the arming request */ +/* -------------- */ + pseudo_bit_t cmd[0x00003]; /* 0x0 - No command + 0x1 - Request notification for next Solicited completion event. Counter filed specifies the current CQ Consumer Counter. + 0x2 - Request notification for next Solicited or Unsolicited completion event. Counter filed specifies the current CQ Consumer counter. + 0x3 - Request notification for multiple completions (Arm-N). Counter filed specifies the value of the CQ Index that when reached by HW (i.e. HW generates a CQE into this Index) Event will be generated + Other - Reserved */ + pseudo_bit_t cmd_sn[0x00002]; /* Command Sequence Number - See Table 35, "CQ Doorbell Layout" for definition of this filed */ + pseudo_bit_t res[0x00003]; /* Must be 0x2 */ + pseudo_bit_t cq_number[0x00018]; /* CQ number */ +/* -------------- */ +}; + +/* CQ_CI_DB_Record */ + +struct arbelprm_cq_ci_db_record_st { /* Little Endian */ + pseudo_bit_t counter[0x00020]; /* CQ counter */ +/* -------------- */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t res[0x00003]; /* Must be 0x1 */ + pseudo_bit_t cq_number[0x00018]; /* CQ number */ +/* -------------- */ +}; + +/* Virtual_Physical_Mapping */ + +struct arbelprm_virtual_physical_mapping_st { /* Little Endian */ + pseudo_bit_t va_h[0x00020]; /* Virtual Address[63:32]. Valid only for MAP_ICM command. */ +/* -------------- */ + pseudo_bit_t reserved0[0x0000c]; + pseudo_bit_t va_l[0x00014]; /* Virtual Address[31:12]. Valid only for MAP_ICM command. */ +/* -------------- */ + pseudo_bit_t pa_h[0x00020]; /* Physical Address[63:32] */ +/* -------------- */ + pseudo_bit_t log2size[0x00006]; /* Log2 of the size in 4KB pages of the physical and virtual contiguous memory that starts at PA_L/H and VA_L/H */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t pa_l[0x00014]; /* Physical Address[31:12] */ +/* -------------- */ +}; + +/* MOD_STAT_CFG */ + +struct arbelprm_mod_stat_cfg_st { /* Little Endian */ + pseudo_bit_t log_max_srqs[0x00005]; /* Log (base 2) of the number of SRQs to allocate (0 if no SRQs are required), valid only if srq bit is set. */ + pseudo_bit_t reserved0[0x00001]; + pseudo_bit_t srq[0x00001]; /* When set SRQs are supported */ + pseudo_bit_t srq_m[0x00001]; /* Modify SRQ parameters */ + pseudo_bit_t reserved1[0x00018]; +/* -------------- */ + pseudo_bit_t reserved2[0x007e0]; +/* -------------- */ +}; + +/* SRQ Context */ + +struct arbelprm_srq_context_st { /* Little Endian */ + pseudo_bit_t srqn[0x00018]; /* SRQ number */ + pseudo_bit_t log_srq_size[0x00004]; /* Log2 of the Number of WQEs in the Receive Queue. + Maximum value is 0x10, i.e. 16M WQEs. */ + pseudo_bit_t state[0x00004]; /* SRQ State: + 1111 - SW Ownership + 0000 - HW Ownership + 0001 - Error + Valid only on QUERY_SRQ and HW2SW_SRQ commands. */ +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* memory key (L-Key) to be used to access WQEs. */ +/* -------------- */ + pseudo_bit_t srq_db_record_index[0x00020];/* Index in the UAR Context Table Entry containing the doorbell record for the receive queue. + HW uses this index as an offset from the UAR Context Table Entry in order to read this SRQ doorbell record. + The entry is obtained via the usr_page field. */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* Index (offset) of user page allocated for this SRQ (see "non_privileged Access to the HCA Hardware"). Not valid (reserved) in EE context. */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t log_rq_stride[0x00003];/* Stride (max WQE size) on the receive queue. WQ entry is 16*(2^log_RQ_stride) bytes. */ +/* -------------- */ + pseudo_bit_t wqe_addr_h[0x00020]; /* Bits 63:32 of WQE address (WQE base address) */ +/* -------------- */ + pseudo_bit_t reserved1[0x00006]; + pseudo_bit_t srq_wqe_base_adr_l[0x0001a];/* While opening (creating) the SRQ, this field should contain the address of first descriptor to be posted. */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* SRQ protection domain. */ + pseudo_bit_t reserved2[0x00008]; +/* -------------- */ + pseudo_bit_t wqe_cnt[0x00010]; /* WQE count on the SRQ. + Valid only on QUERY_SRQ and HW2SW_SRQ commands. */ + pseudo_bit_t lwm[0x00010]; /* Limit Water Mark - if the LWM is not zero, and the wqe_cnt drops below LWM when a WQE is dequeued from the SRQ, then a SRQ limit event is fired and the LWM is set to zero. */ +/* -------------- */ + pseudo_bit_t srq_wqe_counter[0x00010];/* A 16bits counter that is incremented for each WQE posted to the SQ. + Must be 0x0 in SRQ initialization. + (QUERY_SRQ only). */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00060]; +/* -------------- */ +}; + +/* PBL */ + +struct arbelprm_pbl_st { /* Little Endian */ + pseudo_bit_t mtt_0_h[0x00020]; /* First MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_0_l[0x00020]; /* First MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_1_h[0x00020]; /* Second MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_1_l[0x00020]; /* Second MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_2_h[0x00020]; /* Third MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_2_l[0x00020]; /* Third MTT[31:0] */ +/* -------------- */ + pseudo_bit_t mtt_3_h[0x00020]; /* Fourth MTT[63:32] */ +/* -------------- */ + pseudo_bit_t mtt_3_l[0x00020]; /* Fourth MTT[31:0] */ +/* -------------- */ +}; + +/* Performance Counters */ + +struct arbelprm_performance_counters_st { /* Little Endian */ + pseudo_bit_t sqpc_access_cnt[0x00020];/* SQPC cache access count */ +/* -------------- */ + pseudo_bit_t sqpc_miss_cnt[0x00020];/* SQPC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t rqpc_access_cnt[0x00020];/* RQPC cache access count */ +/* -------------- */ + pseudo_bit_t rqpc_miss_cnt[0x00020];/* RQPC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ + pseudo_bit_t cqc_access_cnt[0x00020];/* CQC cache access count */ +/* -------------- */ + pseudo_bit_t cqc_miss_cnt[0x00020]; /* CQC cache miss count */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t tpt_access_cnt[0x00020];/* TPT cache access count */ +/* -------------- */ + pseudo_bit_t mpt_miss_cnt[0x00020]; /* MPT cache miss count */ +/* -------------- */ + pseudo_bit_t mtt_miss_cnt[0x00020]; /* MTT cache miss count */ +/* -------------- */ + pseudo_bit_t reserved3[0x00620]; +/* -------------- */ +}; + +/* Transport and CI Error Counters */ + +struct arbelprm_transport_and_ci_error_counters_st { /* Little Endian */ + pseudo_bit_t rq_num_lle[0x00020]; /* Responder - number of local length errors */ +/* -------------- */ + pseudo_bit_t sq_num_lle[0x00020]; /* Requester - number of local length errors */ +/* -------------- */ + pseudo_bit_t rq_num_lqpoe[0x00020]; /* Responder - number local QP operation error */ +/* -------------- */ + pseudo_bit_t sq_num_lqpoe[0x00020]; /* Requester - number local QP operation error */ +/* -------------- */ + pseudo_bit_t rq_num_leeoe[0x00020]; /* Responder - number local EE operation error */ +/* -------------- */ + pseudo_bit_t sq_num_leeoe[0x00020]; /* Requester - number local EE operation error */ +/* -------------- */ + pseudo_bit_t rq_num_lpe[0x00020]; /* Responder - number of local protection errors */ +/* -------------- */ + pseudo_bit_t sq_num_lpe[0x00020]; /* Requester - number of local protection errors */ +/* -------------- */ + pseudo_bit_t rq_num_wrfe[0x00020]; /* Responder - number of CQEs with error. + Incremented each time a CQE with error is generated */ +/* -------------- */ + pseudo_bit_t sq_num_wrfe[0x00020]; /* Requester - number of CQEs with error. + Incremented each time a CQE with error is generated */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_mwbe[0x00020]; /* Requester - number of memory window bind errors */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_bre[0x00020]; /* Requester - number of bad response errors */ +/* -------------- */ + pseudo_bit_t rq_num_lae[0x00020]; /* Responder - number of local access errors */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t sq_num_rire[0x00020]; /* Requester - number of remote invalid request errors + NAK-Invalid Request on: + 1. Unsupported OpCode: Responder detected an unsupported OpCode. + 2. Unexpected OpCode: Responder detected an error in the sequence of OpCodes, such + as a missing "Last" packet. + Note: there is no PSN error, thus this does not indicate a dropped packet. */ +/* -------------- */ + pseudo_bit_t rq_num_rire[0x00020]; /* Responder - number of remote invalid request errors. + NAK may or may not be sent. + 1. QP Async Affiliated Error: Unsupported or Reserved OpCode (RC,RD only): + Inbound request OpCode was either reserved, or was for a function not supported by this + QP. (E.g. RDMA or ATOMIC on QP not set up for this). + 2. Misaligned ATOMIC: VA does not point to an aligned address on an atomic opera-tion. + 3. Too many RDMA READ or ATOMIC Requests: There were more requests received + and not ACKed than allowed for the connection. + 4. Out of Sequence OpCode, current packet is "First" or "Only": The Responder + detected an error in the sequence of OpCodes; a missing "Last" packet + 5. Out of Sequence OpCode, current packet is not "First" or "Only": The Responder + detected an error in the sequence of OpCodes; a missing "First" packet + 6. Local Length Error: Inbound "Send" request message exceeded the responder.s avail-able + buffer space. + 7. Length error: RDMA WRITE request message contained too much or too little pay-load + data compared to the DMA length advertised in the first or only packet. + 8. Length error: Payload length was not consistent with the opcode: + a: 0 byte <= "only" <= PMTU bytes + b: ("first" or "middle") == PMTU bytes + c: 1byte <= "last" <= PMTU bytes + 9. Length error: Inbound message exceeded the size supported by the CA port. */ +/* -------------- */ + pseudo_bit_t sq_num_rae[0x00020]; /* Requester - number of remote access errors. + NAK-Remote Access Error on: + R_Key Violation: Responder detected an invalid R_Key while executing an RDMA + Request. */ +/* -------------- */ + pseudo_bit_t rq_num_rae[0x00020]; /* Responder - number of remote access errors. + R_Key Violation Responder detected an R_Key violation while executing an RDMA + request. + NAK may or may not be sent. */ +/* -------------- */ + pseudo_bit_t sq_num_roe[0x00020]; /* Requester - number of remote operation errors. + NAK-Remote Operation Error on: + Remote Operation Error: Responder encountered an error, (local to the responder), + which prevented it from completing the request. */ +/* -------------- */ + pseudo_bit_t rq_num_roe[0x00020]; /* Responder - number of remote operation errors. + NAK-Remote Operation Error on: + 1. Malformed WQE: Responder detected a malformed Receive Queue WQE while pro-cessing + the packet. + 2. Remote Operation Error: Responder encountered an error, (local to the responder), + which prevented it from completing the request. */ +/* -------------- */ + pseudo_bit_t sq_num_tree[0x00020]; /* Requester - number of transport retries exceeded errors */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_rree[0x00020]; /* Requester - number of RNR nak retries exceeded errors */ +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_lrdve[0x00020]; /* Requester - number of local RDD violation errors */ +/* -------------- */ + pseudo_bit_t rq_num_rirdre[0x00020];/* Responder - number of remote invalid RD request errors */ +/* -------------- */ + pseudo_bit_t reserved5[0x00040]; +/* -------------- */ + pseudo_bit_t sq_num_rabrte[0x00020];/* Requester - number of remote aborted errors */ +/* -------------- */ + pseudo_bit_t reserved6[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_ieecne[0x00020];/* Requester - number of invalid EE context number errors */ +/* -------------- */ + pseudo_bit_t reserved7[0x00020]; +/* -------------- */ + pseudo_bit_t sq_num_ieecse[0x00020];/* Requester - invalid EE context state errors */ +/* -------------- */ + pseudo_bit_t reserved8[0x00380]; +/* -------------- */ + pseudo_bit_t rq_num_oos[0x00020]; /* Responder - number of out of sequence requests received */ +/* -------------- */ + pseudo_bit_t sq_num_oos[0x00020]; /* Requester - number of out of sequence Naks received */ +/* -------------- */ + pseudo_bit_t rq_num_mce[0x00020]; /* Responder - number of bad multicast packets received */ +/* -------------- */ + pseudo_bit_t reserved9[0x00020]; +/* -------------- */ + pseudo_bit_t rq_num_rsync[0x00020]; /* Responder - number of RESYNC operations */ +/* -------------- */ + pseudo_bit_t sq_num_rsync[0x00020]; /* Requester - number of RESYNC operations */ +/* -------------- */ + pseudo_bit_t rq_num_udsdprd[0x00020];/* The number of UD packets silently discarded on the receive queue due to lack of receive descriptor. */ +/* -------------- */ + pseudo_bit_t reserved10[0x00020]; +/* -------------- */ + pseudo_bit_t rq_num_ucsdprd[0x00020];/* The number of UC packets silently discarded on the receive queue due to lack of receive descriptor. */ +/* -------------- */ + pseudo_bit_t reserved11[0x003e0]; +/* -------------- */ + pseudo_bit_t num_cqovf[0x00020]; /* Number of CQ overflows */ +/* -------------- */ + pseudo_bit_t num_eqovf[0x00020]; /* Number of EQ overflows */ +/* -------------- */ + pseudo_bit_t num_baddb[0x00020]; /* Number of bad doorbells */ +/* -------------- */ + pseudo_bit_t reserved12[0x002a0]; +/* -------------- */ +}; + +/* Event_data Field - HCR Completion Event */ + +struct arbelprm_hcr_completion_event_st { /* Little Endian */ + pseudo_bit_t token[0x00010]; /* HCR Token */ + pseudo_bit_t reserved0[0x00010]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t status[0x00008]; /* HCR Status */ + pseudo_bit_t reserved2[0x00018]; +/* -------------- */ + pseudo_bit_t out_param_h[0x00020]; /* HCR Output Parameter [63:32] */ +/* -------------- */ + pseudo_bit_t out_param_l[0x00020]; /* HCR Output Parameter [31:0] */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ +}; + +/* Completion with Error CQE */ + +struct arbelprm_completion_with_error_st { /* Little Endian */ + pseudo_bit_t myqpn[0x00018]; /* Indicates the QP for which completion is being reported */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00060]; +/* -------------- */ + pseudo_bit_t reserved2[0x00010]; + pseudo_bit_t vendor_code[0x00008]; + pseudo_bit_t syndrome[0x00008]; /* Completion with error syndrome: + 0x01 - Local Length Error + 0x02 - Local QP Operation Error + 0x03 - Local EE Context Operation Error + 0x04 - Local Protection Error + 0x05 - Work Request Flushed Error + 0x06 - Memory Window Bind Error + 0x10 - Bad Response Error + 0x11 - Local Access Error + 0x12 - Remote Invalid Request Error + 0x13 - Remote Access Error + 0x14 - Remote Operation Error + 0x15 - Transport Retry Counter Exceeded + 0x16 - RNR Retry Counter Exceeded + 0x20 - Local RDD Violation Error + 0x21 - Remote Invalid RD Request + 0x22 - Remote Aborted Error + 0x23 - Invalid EE Context Number + 0x24 - Invalid EE Context State + other - Reserved + Syndrome is defined according to the IB specification volume 1. For detailed explanation of the syndromes, refer to chapters 10-11 of the IB specification rev 1.1. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00020]; +/* -------------- */ + pseudo_bit_t reserved4[0x00006]; + pseudo_bit_t wqe_addr[0x0001a]; /* Bits 31:6 of WQE virtual address completion is reported for. The 6 least significant bits are zero. */ +/* -------------- */ + pseudo_bit_t reserved5[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner field. Zero value of this field means SW ownership of CQE. */ + pseudo_bit_t reserved6[0x00010]; + pseudo_bit_t opcode[0x00008]; /* The opcode of WQE completion is reported for. + + The following values are reported in case of completion with error: + 0xFE - For completion with error on Receive Queues + 0xFF - For completion with error on Send Queues */ +/* -------------- */ +}; + +/* Resize CQ Input Mailbox */ + +struct arbelprm_resize_cq_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t start_addr_h[0x00020]; /* Start address of CQ[63:32]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t start_addr_l[0x00020]; /* Start address of CQ[31:0]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t reserved1[0x00018]; + pseudo_bit_t log_cq_size[0x00005]; /* Log (base 2) of the CQ size (in entries) */ + pseudo_bit_t reserved2[0x00003]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* Memory key (L_Key) to be used to access CQ */ +/* -------------- */ + pseudo_bit_t reserved4[0x00100]; +/* -------------- */ +}; + +/* MAD_IFC Input Modifier */ + +struct arbelprm_mad_ifc_input_modifier_st { /* Little Endian */ + pseudo_bit_t port_number[0x00008]; /* The packet reception port number (1 or 2). */ + pseudo_bit_t mad_extended_info[0x00001];/* Mad_Extended_Info valid bit (MAD_IFC Input Mailbox data from offset 00100h and down). MAD_Extended_Info is read only if this bit is set. + Required for trap generation when BKey check is enabled and for global routed packets. */ + pseudo_bit_t reserved0[0x00007]; + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID from the received MAD. + This field is required for trap generation upon MKey/BKey validation. */ +/* -------------- */ +}; + +/* MAD_IFC Input Mailbox */ + +struct arbelprm_mad_ifc_st { /* Little Endian */ + pseudo_bit_t request_mad_packet[64][0x00020];/* Request MAD Packet (256bytes) */ +/* -------------- */ + pseudo_bit_t my_qpn[0x00018]; /* Destination QP number from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t rqpn[0x00018]; /* Remote (source) QP number from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t ml_path[0x00007]; /* My (destination) LID path bits from the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t g[0x00001]; /* If set, the GRH field in valid. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved2[0x00004]; + pseudo_bit_t sl[0x00004]; /* Service Level of the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ +/* -------------- */ + pseudo_bit_t pkey_indx[0x00010]; /* Index in PKey table that matches PKey of the received MAD. + This field is reserved if Mad_extended_info indication in the input modifier is clear. */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00180]; +/* -------------- */ + pseudo_bit_t grh[10][0x00020]; /* The GRH field of the MAD packet that was scattered to the first 40 bytes pointed to by the scatter list. + Valid if Mad_extended_info bit (in the input modifier) and g bit are set. + Otherwise this field is reserved. */ +/* -------------- */ + pseudo_bit_t reserved5[0x004c0]; +/* -------------- */ +}; + +/* Query Debug Message */ + +struct arbelprm_query_debug_msg_st { /* Little Endian */ + pseudo_bit_t phy_addr_h[0x00020]; /* Translation of the address in firmware area. High 32 bits. */ +/* -------------- */ + pseudo_bit_t v[0x00001]; /* Physical translation is valid */ + pseudo_bit_t reserved0[0x0000b]; + pseudo_bit_t phy_addr_l[0x00014]; /* Translation of the address in firmware area. Low 32 bits. */ +/* -------------- */ + pseudo_bit_t fw_area_base[0x00020]; /* Firmware area base address. The format strings and the trace buffers may be located starting from this address. */ +/* -------------- */ + pseudo_bit_t fw_area_size[0x00020]; /* Firmware area size */ +/* -------------- */ + pseudo_bit_t trc_hdr_sz[0x00020]; /* Trace message header size in dwords. */ +/* -------------- */ + pseudo_bit_t trc_arg_num[0x00020]; /* The number of arguments per trace message. */ +/* -------------- */ + pseudo_bit_t reserved1[0x000c0]; +/* -------------- */ + pseudo_bit_t dbg_msk_h[0x00020]; /* Debug messages mask [63:32] */ +/* -------------- */ + pseudo_bit_t dbg_msk_l[0x00020]; /* Debug messages mask [31:0] */ +/* -------------- */ + pseudo_bit_t reserved2[0x00040]; +/* -------------- */ + pseudo_bit_t buff0_addr[0x00020]; /* Address in firmware area of Trace Buffer 0 */ +/* -------------- */ + pseudo_bit_t buff0_size[0x00020]; /* Size of Trace Buffer 0 */ +/* -------------- */ + pseudo_bit_t buff1_addr[0x00020]; /* Address in firmware area of Trace Buffer 1 */ +/* -------------- */ + pseudo_bit_t buff1_size[0x00020]; /* Size of Trace Buffer 1 */ +/* -------------- */ + pseudo_bit_t buff2_addr[0x00020]; /* Address in firmware area of Trace Buffer 2 */ +/* -------------- */ + pseudo_bit_t buff2_size[0x00020]; /* Size of Trace Buffer 2 */ +/* -------------- */ + pseudo_bit_t buff3_addr[0x00020]; /* Address in firmware area of Trace Buffer 3 */ +/* -------------- */ + pseudo_bit_t buff3_size[0x00020]; /* Size of Trace Buffer 3 */ +/* -------------- */ + pseudo_bit_t buff4_addr[0x00020]; /* Address in firmware area of Trace Buffer 4 */ +/* -------------- */ + pseudo_bit_t buff4_size[0x00020]; /* Size of Trace Buffer 4 */ +/* -------------- */ + pseudo_bit_t buff5_addr[0x00020]; /* Address in firmware area of Trace Buffer 5 */ +/* -------------- */ + pseudo_bit_t buff5_size[0x00020]; /* Size of Trace Buffer 5 */ +/* -------------- */ + pseudo_bit_t buff6_addr[0x00020]; /* Address in firmware area of Trace Buffer 6 */ +/* -------------- */ + pseudo_bit_t buff6_size[0x00020]; /* Size of Trace Buffer 6 */ +/* -------------- */ + pseudo_bit_t buff7_addr[0x00020]; /* Address in firmware area of Trace Buffer 7 */ +/* -------------- */ + pseudo_bit_t buff7_size[0x00020]; /* Size of Trace Buffer 7 */ +/* -------------- */ + pseudo_bit_t reserved3[0x00400]; +/* -------------- */ +}; + +/* User Access Region */ + +struct arbelprm_uar_st { /* Little Endian */ + struct arbelprm_rd_send_doorbell_st rd_send_doorbell;/* Reliable Datagram send doorbell */ +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send doorbell */ +/* -------------- */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + struct arbelprm_cq_cmd_doorbell_st cq_command_doorbell;/* CQ Doorbell */ +/* -------------- */ + pseudo_bit_t reserved1[0x03ec0]; +/* -------------- */ +}; + +/* Receive doorbell */ + +struct arbelprm_receive_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t wqe_counter[0x00010]; /* Modulo-64K counter of WQEs posted on this queue since its creation. Should be zero for the first doorbell on the QP */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t srq[0x00001]; /* If set, this is a Shared Receive Queue */ + pseudo_bit_t reserved3[0x00002]; + pseudo_bit_t qpn[0x00018]; /* QP number or SRQ number this doorbell is rung on */ +/* -------------- */ +}; + +/* SET_IB Parameters */ + +struct arbelprm_set_ib_st { /* Little Endian */ + pseudo_bit_t rqk[0x00001]; /* Reset QKey Violation Counter */ + pseudo_bit_t reserved0[0x00011]; + pseudo_bit_t sig[0x00001]; /* Set System Image GUID to system_image_guid specified. + system_image_guid and sig must be the same for all ports. */ + pseudo_bit_t reserved1[0x0000d]; +/* -------------- */ + pseudo_bit_t capability_mask[0x00020];/* PortInfo Capability Mask */ +/* -------------- */ + pseudo_bit_t system_image_guid_h[0x00020];/* System Image GUID[63:32], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_l[0x00020];/* System Image GUID[31:0], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00180]; +/* -------------- */ +}; + +/* Multicast Group Member */ + +struct arbelprm_mgm_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00006]; + pseudo_bit_t next_gid_index[0x0001a];/* Index of next Multicast Group Member whose GID maps to same MGID_HASH number. + The index is into the Multicast Group Table, which is the comprised the MGHT and AMGM tables. + next_gid_index=0 means end of the chain. */ +/* -------------- */ + pseudo_bit_t reserved1[0x00060]; +/* -------------- */ + pseudo_bit_t mgid_128_96[0x00020]; /* Multicast group GID[128:96] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_95_64[0x00020]; /* Multicast group GID[95:64] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_63_32[0x00020]; /* Multicast group GID[63:32] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + pseudo_bit_t mgid_31_0[0x00020]; /* Multicast group GID[31:0] in big endian format. + Use the Reserved GID 0:0:0:0:0:0:0:0 for an invalid entry. */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_0; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_1; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_2; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_3; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_4; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_5; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_6; /* Multicast Group Member QP */ +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp_7; /* Multicast Group Member QP */ +/* -------------- */ +}; + +/* INIT_IB Parameters */ + +struct arbelprm_init_ib_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00004]; + pseudo_bit_t vl_cap[0x00004]; /* Maximum VLs supported on the port, excluding VL15. + Legal values are 1,2,4 and 8. */ + pseudo_bit_t port_width_cap[0x00004];/* IB Port Width + 1 - 1x + 3 - 1x, 4x + 11 - 1x, 4x or 12x (must not be used in InfiniHost-III-EX MT25208) + else - Reserved */ + pseudo_bit_t mtu_cap[0x00004]; /* Maximum MTU Supported + 0x0 - Reserved + 0x1 - 256 + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + 0x5 - 0xF Reserved */ + pseudo_bit_t g0[0x00001]; /* Set port GUID0 to GUID0 specified */ + pseudo_bit_t ng[0x00001]; /* Set node GUID to node_guid specified. + node_guid and ng must be the same for all ports. */ + pseudo_bit_t sig[0x00001]; /* Set System Image GUID to system_image_guid specified. + system_image_guid and sig must be the same for all ports. */ + pseudo_bit_t reserved1[0x0000d]; +/* -------------- */ + pseudo_bit_t max_gid[0x00010]; /* Maximum number of GIDs for the port */ + pseudo_bit_t reserved2[0x00010]; +/* -------------- */ + pseudo_bit_t max_pkey[0x00010]; /* Maximum pkeys for the port. + Must be the same for both ports. */ + pseudo_bit_t reserved3[0x00010]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t guid0_h[0x00020]; /* EUI-64 GUID assigned by the manufacturer, takes effect only if the G0 bit is set (bits 63:32) */ +/* -------------- */ + pseudo_bit_t guid0_l[0x00020]; /* EUI-64 GUID assigned by the manufacturer, takes effect only if the G0 bit is set (bits 31:0) */ +/* -------------- */ + pseudo_bit_t node_guid_h[0x00020]; /* Node GUID[63:32], takes effect only if the NG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t node_guid_l[0x00020]; /* Node GUID[31:0], takes effect only if the NG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_h[0x00020];/* System Image GUID[63:32], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t system_image_guid_l[0x00020];/* System Image GUID[31:0], takes effect only if the SIG bit is set + Must be the same for both ports. */ +/* -------------- */ + pseudo_bit_t reserved5[0x006c0]; +/* -------------- */ +}; + +/* Query Device Limitations */ + +struct arbelprm_query_dev_lim_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t log_max_qp[0x00005]; /* Log2 of the Maximum number of QPs supported */ + pseudo_bit_t reserved1[0x00003]; + pseudo_bit_t log2_rsvd_qps[0x00004];/* Log (base 2) of the number of QPs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_qps-1 */ + pseudo_bit_t reserved2[0x00004]; + pseudo_bit_t log_max_qp_sz[0x00008];/* The maximum number of WQEs allowed on the RQ or the SQ is 2^log_max_qp_sz-1 */ + pseudo_bit_t log_max_srq_sz[0x00008];/* The maximum number of WQEs allowed on the SRQ is 2^log_max_srq_sz-1 */ +/* -------------- */ + pseudo_bit_t log_max_ee[0x00005]; /* Log2 of the Maximum number of EE contexts supported */ + pseudo_bit_t reserved3[0x00003]; + pseudo_bit_t log2_rsvd_ees[0x00004];/* Log (base 2) of the number of EECs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_ees-1 */ + pseudo_bit_t reserved4[0x00004]; + pseudo_bit_t log_max_srqs[0x00005]; /* Log base 2 of the maximum number of SRQs supported, valid only if SRQ bit is set. + */ + pseudo_bit_t reserved5[0x00007]; + pseudo_bit_t log2_rsvd_srqs[0x00004];/* Log (base 2) of the number of reserved SRQs for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_srqs-1 + This parameter is valid only if the SRQ bit is set. */ +/* -------------- */ + pseudo_bit_t log_max_cq[0x00005]; /* Log2 of the Maximum number of CQs supported */ + pseudo_bit_t reserved6[0x00003]; + pseudo_bit_t log2_rsvd_cqs[0x00004];/* Log (base 2) of the number of CQs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsrvd_cqs-1 */ + pseudo_bit_t reserved7[0x00004]; + pseudo_bit_t log_max_cq_sz[0x00008];/* Log2 of the Maximum CQEs allowed in a CQ */ + pseudo_bit_t reserved8[0x00008]; +/* -------------- */ + pseudo_bit_t log_max_eq[0x00003]; /* Log2 of the Maximum number of EQs */ + pseudo_bit_t reserved9[0x00005]; + pseudo_bit_t num_rsvd_eqs[0x00004]; /* The number of EQs reserved for firmware use + The reserved resources are numbered from 0 to num_rsvd_eqs-1 + If 0 - no resources are reserved. */ + pseudo_bit_t reserved10[0x00004]; + pseudo_bit_t log_max_mpts[0x00006]; /* Log (base 2) of the maximum number of MPT entries (the number of Regions/Windows) */ + pseudo_bit_t reserved11[0x00002]; + pseudo_bit_t log_max_eq_sz[0x00008];/* Log2 of the Maximum EQEs allowed in a EQ */ +/* -------------- */ + pseudo_bit_t log_max_mtts[0x00006]; /* Log2 of the Maximum number of MTT entries */ + pseudo_bit_t reserved12[0x00002]; + pseudo_bit_t log2_rsvd_mrws[0x00004];/* Log (base 2) of the number of MPTs reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_mrws-1 */ + pseudo_bit_t reserved13[0x00004]; + pseudo_bit_t log_max_mrw_sz[0x00008];/* Log2 of the Maximum Size of Memory Region/Window */ + pseudo_bit_t reserved14[0x00004]; + pseudo_bit_t log2_rsvd_mtts[0x00004];/* Log (base 2) of the number of MTT entries reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_mtts-1 + */ +/* -------------- */ + pseudo_bit_t reserved15[0x00020]; +/* -------------- */ + pseudo_bit_t log_max_ra_res_qp[0x00006];/* Log2 of the Maximum number of outstanding RDMA read/Atomic per QP as a responder */ + pseudo_bit_t reserved16[0x0000a]; + pseudo_bit_t log_max_ra_req_qp[0x00006];/* Log2 of the maximum number of outstanding RDMA read/Atomic per QP as a requester */ + pseudo_bit_t reserved17[0x0000a]; +/* -------------- */ + pseudo_bit_t log_max_ra_res_global[0x00006];/* Log2 of the maximum number of RDMA read/atomic operations the HCA responder can support globally. That implies the RDB table size. */ + pseudo_bit_t reserved18[0x00016]; + pseudo_bit_t log2_rsvd_rdbs[0x00004];/* Log (base 2) of the number of RDB entries reserved for firmware use + The reserved resources are numbered from 0 to 2^log2_rsvd_rdbs-1 */ +/* -------------- */ + pseudo_bit_t rsz_srq[0x00001]; /* Ability to modify the maximum number of WRs per SRQ. */ + pseudo_bit_t reserved19[0x0001f]; +/* -------------- */ + pseudo_bit_t num_ports[0x00004]; /* Number of IB ports. */ + pseudo_bit_t max_vl[0x00004]; /* Maximum VLs supported on each port, excluding VL15 */ + pseudo_bit_t max_port_width[0x00004];/* IB Port Width + 1 - 1x + 3 - 1x, 4x + 11 - 1x, 4x or 12x + else - Reserved */ + pseudo_bit_t max_mtu[0x00004]; /* Maximum MTU Supported + 0x0 - Reserved + 0x1 - 256 + 0x2 - 512 + 0x3 - 1024 + 0x4 - 2048 + 0x5 - 0xF Reserved */ + pseudo_bit_t local_ca_ack_delay[0x00005];/* The Local CA ACK Delay. This is the value recommended to be returned in Query HCA verb. + The delay value in microseconds is computed using 4.096us * 2^(local_ca_ack_delay). */ + pseudo_bit_t reserved20[0x0000b]; +/* -------------- */ + pseudo_bit_t log_max_gid[0x00004]; /* Log2 of the maximum number of GIDs per port */ + pseudo_bit_t reserved21[0x0001c]; +/* -------------- */ + pseudo_bit_t log_max_pkey[0x00004]; /* Log2 of the max PKey Table Size (per IB port) */ + pseudo_bit_t reserved22[0x0000c]; + pseudo_bit_t stat_rate_support[0x00010];/* bit mask of stat rate supported + bit 0 - full bw + bit 1 - 1/4 bw + bit 2 - 1/8 bw + bit 3 - 1/2 bw; */ +/* -------------- */ + pseudo_bit_t reserved23[0x00020]; +/* -------------- */ + pseudo_bit_t rc[0x00001]; /* RC Transport supported */ + pseudo_bit_t uc[0x00001]; /* UC Transport Supported */ + pseudo_bit_t ud[0x00001]; /* UD Transport Supported */ + pseudo_bit_t rd[0x00001]; /* RD Transport Supported */ + pseudo_bit_t raw_ipv6[0x00001]; /* Raw IPv6 Transport Supported */ + pseudo_bit_t raw_ether[0x00001]; /* Raw Ethertype Transport Supported */ + pseudo_bit_t srq[0x00001]; /* SRQ is supported + */ + pseudo_bit_t ipo_ib_checksum[0x00001];/* IP over IB checksum is supported */ + pseudo_bit_t pkv[0x00001]; /* PKey Violation Counter Supported */ + pseudo_bit_t qkv[0x00001]; /* QKey Violation Coutner Supported */ + pseudo_bit_t reserved24[0x00006]; + pseudo_bit_t mw[0x00001]; /* Memory windows supported */ + pseudo_bit_t apm[0x00001]; /* Automatic Path Migration Supported */ + pseudo_bit_t atm[0x00001]; /* Atomic operations supported (atomicity is guaranteed between QPs on this HCA) */ + pseudo_bit_t rm[0x00001]; /* Raw Multicast Supported */ + pseudo_bit_t avp[0x00001]; /* Address Vector Port checking supported */ + pseudo_bit_t udm[0x00001]; /* UD Multicast Supported */ + pseudo_bit_t reserved25[0x00002]; + pseudo_bit_t pg[0x00001]; /* Paging on demand supported */ + pseudo_bit_t r[0x00001]; /* Router mode supported */ + pseudo_bit_t reserved26[0x00006]; +/* -------------- */ + pseudo_bit_t log_pg_sz[0x00008]; /* Minimum system page size supported (log2). + For proper operation it must be less than or equal the hosting platform (CPU) minimum page size. */ + pseudo_bit_t reserved27[0x00008]; + pseudo_bit_t uar_sz[0x00006]; /* UAR Area Size = 1MB * 2^uar_sz */ + pseudo_bit_t reserved28[0x00006]; + pseudo_bit_t num_rsvd_uars[0x00004];/* The number of UARs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_uars-1 + Note that UAR number num_reserved_uars is always for the kernel. */ +/* -------------- */ + pseudo_bit_t reserved29[0x00020]; +/* -------------- */ + pseudo_bit_t max_desc_sz_sq[0x00010];/* Max descriptor size in bytes for the send queue */ + pseudo_bit_t max_sg_sq[0x00008]; /* The maximum S/G list elements in a SQ WQE (max_desc_sz/16 - 3) */ + pseudo_bit_t reserved30[0x00008]; +/* -------------- */ + pseudo_bit_t max_desc_sz_rq[0x00010];/* Max descriptor size in bytes for the receive queue */ + pseudo_bit_t max_sg_rq[0x00008]; /* The maximum S/G list elements in a RQ WQE (max_desc_sz/16 - 3) */ + pseudo_bit_t reserved31[0x00008]; +/* -------------- */ + pseudo_bit_t reserved32[0x00040]; +/* -------------- */ + pseudo_bit_t log_max_mcg[0x00008]; /* Log2 of the maximum number of multicast groups */ + pseudo_bit_t num_rsvd_mcgs[0x00004];/* The number of MGMs reserved for firmware use in the MGHT. + The reserved resources are numbered from 0 to num_reserved_mcgs-1 + If 0 - no resources are reserved. */ + pseudo_bit_t reserved33[0x00004]; + pseudo_bit_t log_max_qp_mcg[0x00008];/* Log2 of the maximum number of QPs per multicast group */ + pseudo_bit_t reserved34[0x00008]; +/* -------------- */ + pseudo_bit_t log_max_rdds[0x00006]; /* Log2 of the maximum number of RDDs */ + pseudo_bit_t reserved35[0x00006]; + pseudo_bit_t num_rsvd_rdds[0x00004];/* The number of RDDs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_rdds-1. + If 0 - no resources are reserved. */ + pseudo_bit_t log_max_pd[0x00006]; /* Log2 of the maximum number of PDs */ + pseudo_bit_t reserved36[0x00006]; + pseudo_bit_t num_rsvd_pds[0x00004]; /* The number of PDs reserved for firmware use + The reserved resources are numbered from 0 to num_reserved_pds-1 + If 0 - no resources are reserved. */ +/* -------------- */ + pseudo_bit_t reserved37[0x000c0]; +/* -------------- */ + pseudo_bit_t qpc_entry_sz[0x00010]; /* QPC Entry Size for the device + For the InfiniHost-III-EX MT25208 entry size is 256 bytes */ + pseudo_bit_t eec_entry_sz[0x00010]; /* EEC Entry Size for the device + For the InfiniHost-III-EX MT25208 entry size is 256 bytes */ +/* -------------- */ + pseudo_bit_t eqpc_entry_sz[0x00010];/* Extended QPC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ + pseudo_bit_t eeec_entry_sz[0x00010];/* Extended EEC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ +/* -------------- */ + pseudo_bit_t cqc_entry_sz[0x00010]; /* CQC entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ + pseudo_bit_t eqc_entry_sz[0x00010]; /* EQ context entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ +/* -------------- */ + pseudo_bit_t uar_scratch_entry_sz[0x00010];/* UAR Scratchpad Entry Size + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ + pseudo_bit_t srq_entry_sz[0x00010]; /* SRQ context entry size for the device + For the InfiniHost-III-EX MT25208 entry size is 32 bytes */ +/* -------------- */ + pseudo_bit_t mpt_entry_sz[0x00010]; /* MPT entry size in Bytes for the device. + For the InfiniHost-III-EX MT25208 entry size is 64 bytes */ + pseudo_bit_t mtt_entry_sz[0x00010]; /* MTT entry size in Bytes for the device. + For the InfiniHost-III-EX MT25208 entry size is 8 bytes */ +/* -------------- */ + pseudo_bit_t bmme[0x00001]; /* Base Memory Management Extension Support */ + pseudo_bit_t win_type[0x00001]; /* Bound Type 2 Memory Window Association mechanism: + 0 - Type 2A - QP Number Association; or + 1 - Type 2B - QP Number and PD Association. */ + pseudo_bit_t mps[0x00001]; /* Ability of this HCA to support multiple page sizes per Memory Region. */ + pseudo_bit_t bl[0x00001]; /* Ability of this HCA to support Block List Physical Buffer Lists. (The device does not supports Block List) */ + pseudo_bit_t zb[0x00001]; /* Zero Based region/windows supported */ + pseudo_bit_t lif[0x00001]; /* Ability of this HCA to support Local Invalidate Fencing. */ + pseudo_bit_t reserved38[0x00002]; + pseudo_bit_t log_pbl_sz[0x00006]; /* Log2 of the Maximum Physical Buffer List size in Bytes supported by this HCA when invoking the Allocate L_Key verb. + */ + pseudo_bit_t reserved39[0x00012]; +/* -------------- */ + pseudo_bit_t resd_lkey[0x00020]; /* The value of the reserved Lkey for Base Memory Management Extension */ +/* -------------- */ + pseudo_bit_t lamr[0x00001]; /* When set the device requires local attached memory in order to operate. + When set, ICM pages, Firmware Area and ICM auxiliary pages must be allocated in the local attached memory. */ + pseudo_bit_t reserved40[0x0001f]; +/* -------------- */ + pseudo_bit_t max_icm_size_h[0x00020];/* Bits [63:32] of maximum ICM size InfiniHost III Ex support in bytes. */ +/* -------------- */ + pseudo_bit_t max_icm_size_l[0x00020];/* Bits [31:0] of maximum ICM size InfiniHost III Ex support in bytes. */ +/* -------------- */ + pseudo_bit_t reserved41[0x002c0]; +/* -------------- */ +}; + +/* QUERY_ADAPTER Parameters Block */ + +struct arbelprm_query_adapter_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t reserved1[0x00018]; + pseudo_bit_t intapin[0x00008]; /* Driver should set this field to INTR value in the event queue in order to get Express interrupt messages. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00060]; +/* -------------- */ + struct arbelprm_vsd_st vsd; +/* -------------- */ +}; + +/* QUERY_FW Parameters Block */ + +struct arbelprm_query_fw_st { /* Little Endian */ + pseudo_bit_t fw_rev_major[0x00010]; /* Firmware Revision - Major */ + pseudo_bit_t fw_pages[0x00010]; /* Amount of physical memory to be allocated for FW usage is in 4KByte pages. */ +/* -------------- */ + pseudo_bit_t fw_rev_minor[0x00010]; /* Firmware Revision - Minor */ + pseudo_bit_t fw_rev_subminor[0x00010];/* Firmware Sub-minor version (Patch level). */ +/* -------------- */ + pseudo_bit_t cmd_interface_rev[0x00010];/* Command Interface Interpreter Revision ID */ + pseudo_bit_t reserved0[0x0000e]; + pseudo_bit_t wqe_h_mode[0x00001]; /* Hermon mode. If '1', then WQE and AV format is the advanced format */ + pseudo_bit_t zb_wq_cq[0x00001]; /* If '1', then ZB mode of WQ and CQ are enabled (i.e. real Memfree PRM is supported) */ +/* -------------- */ + pseudo_bit_t log_max_outstanding_cmd[0x00008];/* Log2 of the maximum number of commands the HCR can support simultaneously */ + pseudo_bit_t reserved1[0x00017]; + pseudo_bit_t dt[0x00001]; /* Debug Trace Support + 0 - Debug trace is not supported + 1 - Debug trace is supported */ +/* -------------- */ + pseudo_bit_t cmd_interface_db[0x00001];/* Set if the device accepts commands by means of special doorbells */ + pseudo_bit_t reserved2[0x0001f]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ + pseudo_bit_t clr_int_base_addr_h[0x00020];/* Bits [63:32] of Clear interrupt register physical address. + Points to 64 bit register. */ +/* -------------- */ + pseudo_bit_t clr_int_base_addr_l[0x00020];/* Bits [31:0] of Clear interrupt register physical address. + Points to 64 bit register. */ +/* -------------- */ + pseudo_bit_t reserved4[0x00040]; +/* -------------- */ + pseudo_bit_t error_buf_start_h[0x00020];/* Read Only buffer for catastrophic error reports (physical address) */ +/* -------------- */ + pseudo_bit_t error_buf_start_l[0x00020];/* Read Only buffer for catastrophic error reports (physical address) */ +/* -------------- */ + pseudo_bit_t error_buf_size[0x00020];/* Size in words */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t eq_arm_base_addr_h[0x00020];/* Bits [63:32] of EQ Arm DBs physical address. + Points to 64 bit register. + Setting bit x in the offset, arms EQ number x. + */ +/* -------------- */ + pseudo_bit_t eq_arm_base_addr_l[0x00020];/* Bits [31:0] of EQ Arm DBs physical address. + Points to 64 bit register. + Setting bit x in the offset, arms EQ number x. */ +/* -------------- */ + pseudo_bit_t eq_set_ci_base_addr_h[0x00020];/* Bits [63:32] of EQ Set CI DBs Table physical address. + Points to a the EQ Set CI DBs Table base address. */ +/* -------------- */ + pseudo_bit_t eq_set_ci_base_addr_l[0x00020];/* Bits [31:0] of EQ Set CI DBs Table physical address. + Points to a the EQ Set CI DBs Table base address. */ +/* -------------- */ + pseudo_bit_t cmd_db_dw1[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 1 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw0[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 0 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw3[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 3 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw2[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 2 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw5[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 5 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw4[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 4 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_dw7[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 7 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ + pseudo_bit_t cmd_db_dw6[0x00010]; /* offset in bytes from cmd_db_addr_base where DWord 6 of a Command Interface Doorbell should be written. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_addr_base_h[0x00020];/* High bits of cmd_db_addr_base, which cmd_db_dw offsets refer to. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t cmd_db_addr_base_l[0x00020];/* Low bits of cmd_db_addr_base, which cmd_db_dw offsets refer to. Valid only if CmdInterfaceDb bit is '1' */ +/* -------------- */ + pseudo_bit_t reserved6[0x004c0]; +/* -------------- */ +}; + +/* ACCESS_LAM */ + +struct arbelprm_access_lam_st { /* Little Endian */ + struct arbelprm_access_lam_inject_errors_st access_lam_inject_errors; +/* -------------- */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ +}; + +/* ENABLE_LAM Parameters Block */ + +struct arbelprm_enable_lam_st { /* Little Endian */ + pseudo_bit_t lam_start_adr_h[0x00020];/* LAM start address [63:32] */ +/* -------------- */ + pseudo_bit_t lam_start_adr_l[0x00020];/* LAM start address [31:0] */ +/* -------------- */ + pseudo_bit_t lam_end_adr_h[0x00020];/* LAM end address [63:32] */ +/* -------------- */ + pseudo_bit_t lam_end_adr_l[0x00020];/* LAM end address [31:0] */ +/* -------------- */ + pseudo_bit_t di[0x00002]; /* Data Integrity Configuration: + 00 - none + 01 - Parity + 10 - ECC Detection Only + 11 - ECC With Correction */ + pseudo_bit_t ap[0x00002]; /* Auto Precharge Mode + 00 - No auto precharge + 01 - Auto precharge per transaction + 10 - Auto precharge per 64 bytes + 11 - reserved */ + pseudo_bit_t dh[0x00001]; /* When set, LAM is Hidden and can not be accessed directly from the PCI bus. */ + pseudo_bit_t reserved0[0x0001b]; +/* -------------- */ + pseudo_bit_t reserved1[0x00160]; +/* -------------- */ + struct arbelprm_dimminfo_st dimm0; /* Logical DIMM 0 Parameters */ +/* -------------- */ + struct arbelprm_dimminfo_st dimm1; /* Logical DIMM 1 Parameters */ +/* -------------- */ + pseudo_bit_t reserved2[0x00400]; +/* -------------- */ +}; + +/* Memory Access Parameters for UD Address Vector Table */ + +struct arbelprm_udavtable_memory_parameters_st { /* Little Endian */ + pseudo_bit_t l_key[0x00020]; /* L_Key used to access TPT */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD used by TPT for matching against PD of region entry being accessed. */ + pseudo_bit_t reserved0[0x00005]; + pseudo_bit_t xlation_en[0x00001]; /* When cleared, address is physical address and no translation will be done. When set, address is virtual. */ + pseudo_bit_t reserved1[0x00002]; +/* -------------- */ +}; + +/* INIT_HCA & QUERY_HCA Parameters Block */ + +struct arbelprm_init_hca_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ + pseudo_bit_t reserved1[0x00010]; + pseudo_bit_t time_stamp_granularity[0x00008];/* This field controls the granularity in which CQE Timestamp counter is incremented. + The TimeStampGranularity units is 1/4 of a microseconds. (e.g is TimeStampGranularity is configured to 0x2, CQE Timestamp will be incremented every one microsecond) + When sets to Zero, timestamp reporting in the CQE is disabled. + This feature is currently not supported. + */ + pseudo_bit_t hca_core_clock[0x00008];/* Internal Clock Period (in units of 1/16 ns) (QUERY_HCA only) */ +/* -------------- */ + pseudo_bit_t reserved2[0x00008]; + pseudo_bit_t router_qp[0x00010]; /* Upper 16 bit to be used as a QP number for router mode. Low order 8 bits are taken from the TClass field of the incoming packet. + Valid only if RE bit is set */ + pseudo_bit_t reserved3[0x00007]; + pseudo_bit_t re[0x00001]; /* Router Mode Enable + If this bit is set, entire packet (including all headers and ICRC) will be considered as a data payload and will be scattered to memory as specified in the descriptor that is posted on the QP matching the TClass field of packet. */ +/* -------------- */ + pseudo_bit_t udp[0x00001]; /* UD Port Check Enable + 0 - Port field in Address Vector is ignored + 1 - HCA will check the port field in AV entry (fetched for UD descriptor) against the Port of the UD QP executing the descriptor. */ + pseudo_bit_t he[0x00001]; /* Host Endianess - Used for Atomic Operations + 0 - Host is Little Endian + 1 - Host is Big endian + */ + pseudo_bit_t reserved4[0x00001]; + pseudo_bit_t ce[0x00001]; /* Checksum Enabled - when Set IPoverIB checksum generation & checking is enabled */ + pseudo_bit_t sph[0x00001]; /* 0 - SW calculates TCP/UDP Pseudo-Header checksum and inserts it into the TCP/UDP checksum field when sending a packet + 1 - HW calculates TCP/UDP Pseudo-Header checksum when sending a packet + */ + pseudo_bit_t rph[0x00001]; /* 0 - Not HW calculation of TCP/UDP Pseudo-Header checksum are done when receiving a packet + 1 - HW calculates TCP/UDP Pseudo-Header checksum when receiving a packet + */ + pseudo_bit_t reserved5[0x00002]; + pseudo_bit_t responder_exu[0x00004];/* Indicate the relation between the execution enegines allocation dedicated for responder versus the engines dedicated for reqvester . + responder_exu/16 = (number of responder exu engines)/(total number of engines) + Legal values are 0x0-0xF. 0 is "auto". + + */ + pseudo_bit_t reserved6[0x00004]; + pseudo_bit_t wqe_quota[0x0000f]; /* Maximum number of WQEs that are executed prior to preemption of execution unit. 0 - reserved. */ + pseudo_bit_t wqe_quota_en[0x00001]; /* If set - wqe_quota field is used. If cleared - WQE quota is set to "auto" value */ +/* -------------- */ + pseudo_bit_t reserved7[0x00040]; +/* -------------- */ + struct arbelprm_qpcbaseaddr_st qpc_eec_cqc_eqc_rdb_parameters; +/* -------------- */ + pseudo_bit_t reserved8[0x00100]; +/* -------------- */ + struct arbelprm_multicastparam_st multicast_parameters; +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ + struct arbelprm_tptparams_st tpt_parameters; +/* -------------- */ + pseudo_bit_t reserved10[0x00080]; +/* -------------- */ + struct arbelprm_uar_params_st uar_parameters;/* UAR Parameters */ +/* -------------- */ + pseudo_bit_t reserved11[0x00600]; +/* -------------- */ +}; + +/* Event Queue Context Table Entry */ + +struct arbelprm_eqc_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t st[0x00004]; /* Event delivery state machine + 0x9 - Armed + 0xA - Fired + 0xB - Always_Armed (auto-rearm) + other - reserved */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t oi[0x00001]; /* Oerrun ignore. + If set, HW will not check EQ full condition when writing new EQEs. */ + pseudo_bit_t tr[0x00001]; /* Translation Required. If set - EQ access undergo address translation. */ + pseudo_bit_t reserved2[0x00005]; + pseudo_bit_t owner[0x00004]; /* 0 - SW ownership + 1 - HW ownership + Valid for the QUERY_EQ and HW2SW_EQ commands only */ + pseudo_bit_t status[0x00004]; /* EQ status: + 0000 - OK + 1010 - EQ write failure + Valid for the QUERY_EQ and HW2SW_EQ commands only */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address of Event Queue[63:32]. */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address of Event Queue[31:0]. + Must be aligned on 32-byte boundary */ +/* -------------- */ + pseudo_bit_t reserved3[0x00018]; + pseudo_bit_t log_eq_size[0x00005]; /* Amount of entries in this EQ is 2^log_eq_size. + Log_eq_size must be bigger than 1. + Maximum EQ size is 2^17 EQEs (max Log_eq_size is 17). */ + pseudo_bit_t reserved4[0x00003]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t intr[0x00008]; /* Interrupt (message) to be generated to report event to INT layer. + 00iiiiii - set to INTA given in QUERY_ADAPTER in order to generate INTA messages on Express. + 10jjjjjj - specificies type of interrupt message to be generated (total 64 different messages supported). + All other values are reserved and should not be used. + + If interrupt generation is not required, ST field must be set upon creation to Fired state. No EQ arming doorbell should be performed. In this case hardware will not generate any interrupt. */ + pseudo_bit_t reserved6[0x00018]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD to be used to access EQ */ + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t lkey[0x00020]; /* Memory key (L-Key) to be used to access EQ */ +/* -------------- */ + pseudo_bit_t reserved8[0x00040]; +/* -------------- */ + pseudo_bit_t consumer_indx[0x00020];/* Contains next entry to be read upon polling the event queue. + Must be initalized to zero while opening EQ */ +/* -------------- */ + pseudo_bit_t producer_indx[0x00020];/* Contains next entry in EQ to be written by the HCA. + Must be initalized to zero while opening EQ. */ +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ +}; + +/* Memory Translation Table (MTT) Entry */ + +struct arbelprm_mtt_st { /* Little Endian */ + pseudo_bit_t ptag_h[0x00020]; /* High-order bits of physical tag. The size of the field depends on the page size of the region. Maximum PTAG size is 52 bits. */ +/* -------------- */ + pseudo_bit_t p[0x00001]; /* Present bit. If set, page entry is valid. If cleared, access to this page will generate non-present page access fault. */ + pseudo_bit_t reserved0[0x0000b]; + pseudo_bit_t ptag_l[0x00014]; /* Low-order bits of Physical tag. The size of the field depends on the page size of the region. Maximum PTAG size is 52 bits. */ +/* -------------- */ +}; + +/* Memory Protection Table (MPT) Entry */ + +struct arbelprm_mpt_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t r_w[0x00001]; /* Defines whether this entry is Region (1) or Window (0) */ + pseudo_bit_t pa[0x00001]; /* Physical address. If set, no virtual-to-physical address translation will be performed for this region */ + pseudo_bit_t lr[0x00001]; /* If set - local read access enabled */ + pseudo_bit_t lw[0x00001]; /* If set - local write access enabled */ + pseudo_bit_t rr[0x00001]; /* If set - remote read access enabled. */ + pseudo_bit_t rw[0x00001]; /* If set - remote write access enabled */ + pseudo_bit_t a[0x00001]; /* If set - remote Atomic access is enabled */ + pseudo_bit_t eb[0x00001]; /* If set - Bind is enabled. Valid for region entry only. */ + pseudo_bit_t reserved1[0x0000c]; + pseudo_bit_t status[0x00004]; /* Region/Window Status + 0xF - not valid (SW ownership) + 0x3 - FREE state + else - HW ownership + Unbound Type I windows are doneted reg_wnd_len field equals zero. + Unbound Type II windows are donated by Status=FREE. */ +/* -------------- */ + pseudo_bit_t page_size[0x00005]; /* Page size used for the region. Actual size is [4K]*2^Page_size bytes. + page_size should be less than 20. */ + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t type[0x00001]; /* Applicable for windows only, must be zero for regions + 0 - Type one window + 1 - Type two window */ + pseudo_bit_t qpn[0x00018]; /* QP number this MW is attached to. Valid for type2 memory windows and on QUERY_MPT only */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* The memory Key. The field holds the mem_key field in the following semantics: {key[7:0],key[31:8]}. + */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* Protection Domain */ + pseudo_bit_t reserved3[0x00001]; + pseudo_bit_t ei[0x00001]; /* Enable Invalidation - When set, Local/Remote invalidation can be executed on this window/region. + Must be set for type2 windows and non-shared physical memory regions. + Must be clear for regions that are used to access Work Queues, Completion Queues and Event Queues */ + pseudo_bit_t zb[0x00001]; /* When set, this region is Zero Based Region */ + pseudo_bit_t fre[0x00001]; /* When set, Fast Registration Operations can be executed on this region */ + pseudo_bit_t rae[0x00001]; /* When set, remote access can be enabled on this region. + Used when executing Fast Registration Work Request to validate that remote access rights can be granted to this MPT. + If the bit is cleared, Fast Registration Work Request requesting remote access rights will fail. + */ + pseudo_bit_t reserved4[0x00003]; +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start Address[63:32] - Virtual Address where this region/window starts */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start Address[31:0] - Virtual Address where this region/window starts */ +/* -------------- */ + pseudo_bit_t reg_wnd_len_h[0x00020];/* Region/Window Length[63:32] */ +/* -------------- */ + pseudo_bit_t reg_wnd_len_l[0x00020];/* Region/Window Length[31:0] */ +/* -------------- */ + pseudo_bit_t lkey[0x00020]; /* Must be 0 for SW2HW_MPT. + On QUERY_MPT and HW2SW_MPT commands for Memory Window it reflects the LKey of the Region that the Window is bound to. + The field holds the lkey field in the following semantics: {key[7:0],key[31:8]}. */ +/* -------------- */ + pseudo_bit_t win_cnt[0x00020]; /* Number of windows bound to this region. Valid for regions only. + The field is valid only for the QUERY_MPT and HW2SW_MPT commands. */ +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t mtt_adr_h[0x00006]; /* Base (first) address of the MTT relative to MTT base in the ICM */ + pseudo_bit_t reserved6[0x0001a]; +/* -------------- */ + pseudo_bit_t reserved7[0x00003]; + pseudo_bit_t mtt_adr_l[0x0001d]; /* Base (first) address of the MTT relative to MTT base address in the ICM. Must be aligned on 8 bytes. */ +/* -------------- */ + pseudo_bit_t mtt_sz[0x00020]; /* Number of MTT entries allocated for this MR. + When Fast Registration Operations can not be executed on this region (FRE bit is zero) this field is reserved. + When Fast Registration Operation is enabled (FRE bit is set) this field indicates the number of MTTs allocated for this MR. If mtt_sz value is zero, there is no limit for the numbers of MTTs and the HCA does not check this field when executing fast register WQE. */ +/* -------------- */ + pseudo_bit_t reserved8[0x00040]; +/* -------------- */ +}; + +/* Completion Queue Context Table Entry */ + +struct arbelprm_completion_queue_context_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t st[0x00004]; /* Event delivery state machine + 0x0 - reserved + 0x9 - ARMED (Request for Notification) + 0x6 - ARMED SOLICITED (Request Solicited Notification) + 0xA - FIRED + other - reserved + + Must be 0x0 in CQ initialization. + Valid for the QUERY_CQ and HW2SW_CQ commands only. */ + pseudo_bit_t reserved1[0x00005]; + pseudo_bit_t oi[0x00001]; /* When set, overrun ignore is enabled. + When set, Updates of CQ consumer counter (poll for completion) or Request completion notifications (Arm CQ) doorbells should not be rang on that CQ. */ + pseudo_bit_t reserved2[0x0000a]; + pseudo_bit_t status[0x00004]; /* CQ status + 0000 - OK + 1001 - CQ overflow + 1010 - CQ write failure + Valid for the QUERY_CQ and HW2SW_CQ commands only */ +/* -------------- */ + pseudo_bit_t start_address_h[0x00020];/* Start address of CQ[63:32]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t start_address_l[0x00020];/* Start address of CQ[31:0]. + Must be aligned on CQE size (32 bytes) */ +/* -------------- */ + pseudo_bit_t usr_page[0x00018]; /* UAR page this CQ can be accessed through (ringinig CQ doorbells) */ + pseudo_bit_t log_cq_size[0x00005]; /* Log (base 2) of the CQ size (in entries). + Maximum CQ size is 2^17 CQEs (max log_cq_size is 17) */ + pseudo_bit_t reserved3[0x00003]; +/* -------------- */ + pseudo_bit_t reserved4[0x00020]; +/* -------------- */ + pseudo_bit_t c_eqn[0x00008]; /* Event Queue this CQ reports completion events to. + Valid values are 0 to 63 + If configured to value other than 0-63, completion events will not be reported on the CQ. */ + pseudo_bit_t reserved5[0x00018]; +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* Protection Domain to be used to access CQ. + Must be the same PD of the CQ L_Key. */ + pseudo_bit_t reserved6[0x00008]; +/* -------------- */ + pseudo_bit_t l_key[0x00020]; /* Memory key (L_Key) to be used to access CQ */ +/* -------------- */ + pseudo_bit_t last_notified_indx[0x00020];/* Maintained by HW. + Valid for QUERY_CQ and HW2SW_CQ commands only. */ +/* -------------- */ + pseudo_bit_t solicit_producer_indx[0x00020];/* Maintained by HW. + Valid for QUERY_CQ and HW2SW_CQ commands only. + */ +/* -------------- */ + pseudo_bit_t consumer_counter[0x00020];/* Consumer counter is a 32bits counter that is incremented for each CQE pooled from the CQ. + Must be 0x0 in CQ initialization. + Valid for the QUERY_CQ and HW2SW_CQ commands only. */ +/* -------------- */ + pseudo_bit_t producer_counter[0x00020];/* Producer counter is a 32bits counter that is incremented for each CQE that is written by the HW to the CQ. + CQ overrun is reported if Producer_counter + 1 equals to Consumer_counter and a CQE needs to be added.. + Maintained by HW (valid for the QUERY_CQ and HW2SW_CQ commands only) */ +/* -------------- */ + pseudo_bit_t cqn[0x00018]; /* CQ number. Least significant bits are constrained by the position of this CQ in CQC table + Valid for the QUERY_CQ and HW2SW_CQ commands only */ + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t cq_ci_db_record[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this CQ Consumer Counter doorbell record. + This value can be retrieved from the HW in the QUERY_CQ command. */ +/* -------------- */ + pseudo_bit_t cq_state_db_record[0x00020];/* Index in the UAR Context Table Entry. + HW uses this index as an offset from the UAR Context Table Entry in order to read this CQ state doorbell record. + This value can be retrieved from the HW in the QUERY_CQ command. */ +/* -------------- */ + pseudo_bit_t reserved8[0x00020]; +/* -------------- */ +}; + +/* GPIO_event_data */ + +struct arbelprm_gpio_event_data_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00060]; +/* -------------- */ + pseudo_bit_t gpio_event_hi[0x00020];/* If any bit is set to 1, then a rising/falling event has occurred on the corrsponding GPIO pin. */ +/* -------------- */ + pseudo_bit_t gpio_event_lo[0x00020];/* If any bit is set to 1, then a rising/falling event has occurred on the corrsponding GPIO pin. */ +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ +}; + +/* Event_data Field - QP/EE Events */ + +struct arbelprm_qp_ee_event_st { /* Little Endian */ + pseudo_bit_t qpn_een[0x00018]; /* QP/EE/SRQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t reserved2[0x0001c]; + pseudo_bit_t e_q[0x00001]; /* If set - EEN if cleared - QP in the QPN/EEN field + Not valid on SRQ events */ + pseudo_bit_t reserved3[0x00003]; +/* -------------- */ + pseudo_bit_t reserved4[0x00060]; +/* -------------- */ +}; + +/* InfiniHost-III-EX Type0 Configuration Header */ + +struct arbelprm_mt25208_type0_st { /* Little Endian */ + pseudo_bit_t vendor_id[0x00010]; /* Hardwired to 0x15B3 */ + pseudo_bit_t device_id[0x00010]; /* 25208 (decimal) - InfiniHost-III compatible mode + 25218 (decimal) - InfiniHost-III EX mode (the mode described in this manual) + 25209 (decimal) - Flash burner mode - see Flash burning application note for further details on this mode + */ +/* -------------- */ + pseudo_bit_t command[0x00010]; /* PCI Command Register */ + pseudo_bit_t status[0x00010]; /* PCI Status Register */ +/* -------------- */ + pseudo_bit_t revision_id[0x00008]; + pseudo_bit_t class_code_hca_class_code[0x00018]; +/* -------------- */ + pseudo_bit_t cache_line_size[0x00008];/* Cache Line Size */ + pseudo_bit_t latency_timer[0x00008]; + pseudo_bit_t header_type[0x00008]; /* hardwired to zero */ + pseudo_bit_t bist[0x00008]; +/* -------------- */ + pseudo_bit_t bar0_ctrl[0x00004]; /* hard-wired to 0100 */ + pseudo_bit_t reserved0[0x00010]; + pseudo_bit_t bar0_l[0x0000c]; /* Lower bits of BAR0 (Device Configuration Space) */ +/* -------------- */ + pseudo_bit_t bar0_h[0x00020]; /* Upper 32 bits of BAR0 (Device Configuration Space) */ +/* -------------- */ + pseudo_bit_t bar1_ctrl[0x00004]; /* Hardwired to 1100 */ + pseudo_bit_t reserved1[0x00010]; + pseudo_bit_t bar1_l[0x0000c]; /* Lower bits of BAR1 (User Access Region - UAR - space) */ +/* -------------- */ + pseudo_bit_t bar1_h[0x00020]; /* upper 32 bits of BAR1 (User Access Region - UAR - space) */ +/* -------------- */ + pseudo_bit_t bar2_ctrl[0x00004]; /* Hardwired to 1100 */ + pseudo_bit_t reserved2[0x00010]; + pseudo_bit_t bar2_l[0x0000c]; /* Lower bits of BAR2 - Local Attached Memory if present and enabled. Else zeroed. */ +/* -------------- */ + pseudo_bit_t bar2_h[0x00020]; /* Upper 32 bits of BAR2 - Local Attached Memory if present and enabled. Else zeroed. */ +/* -------------- */ + pseudo_bit_t cardbus_cis_pointer[0x00020]; +/* -------------- */ + pseudo_bit_t subsystem_vendor_id[0x00010];/* Specified by the device NVMEM configuration */ + pseudo_bit_t subsystem_id[0x00010]; /* Specified by the device NVMEM configuration */ +/* -------------- */ + pseudo_bit_t expansion_rom_enable[0x00001];/* Expansion ROM Enable. Hardwired to 0 if expansion ROM is disabled in the device NVMEM configuration. */ + pseudo_bit_t reserved3[0x0000a]; + pseudo_bit_t expansion_rom_base_address[0x00015];/* Expansion ROM Base Address (upper 21 bit). Hardwired to 0 if expansion ROM is disabled in the device NVMEM configuration. */ +/* -------------- */ + pseudo_bit_t capabilities_pointer[0x00008];/* Specified by the device NVMEM configuration */ + pseudo_bit_t reserved4[0x00018]; +/* -------------- */ + pseudo_bit_t reserved5[0x00020]; +/* -------------- */ + pseudo_bit_t interrupt_line[0x00008]; + pseudo_bit_t interrupt_pin[0x00008]; + pseudo_bit_t min_gnt[0x00008]; + pseudo_bit_t max_latency[0x00008]; +/* -------------- */ + pseudo_bit_t reserved6[0x00100]; +/* -------------- */ + pseudo_bit_t msi_cap_id[0x00008]; + pseudo_bit_t msi_next_cap_ptr[0x00008]; + pseudo_bit_t msi_en[0x00001]; + pseudo_bit_t multiple_msg_cap[0x00003]; + pseudo_bit_t multiple_msg_en[0x00003]; + pseudo_bit_t cap_64_bit_addr[0x00001]; + pseudo_bit_t reserved7[0x00008]; +/* -------------- */ + pseudo_bit_t msg_addr_l[0x00020]; +/* -------------- */ + pseudo_bit_t msg_addr_h[0x00020]; +/* -------------- */ + pseudo_bit_t msg_data[0x00010]; + pseudo_bit_t reserved8[0x00010]; +/* -------------- */ + pseudo_bit_t reserved9[0x00080]; +/* -------------- */ + pseudo_bit_t pm_cap_id[0x00008]; /* Power management capability ID - 01h */ + pseudo_bit_t pm_next_cap_ptr[0x00008]; + pseudo_bit_t pm_cap[0x00010]; /* [2:0] Version - 02h + [3] PME clock - 0h + [4] RsvP + [5] Device specific initialization - 0h + [8:6] AUX current - 0h + [9] D1 support - 0h + [10] D2 support - 0h + [15:11] PME support - 0h */ +/* -------------- */ + pseudo_bit_t pm_status_control[0x00010];/* [14:13] - Data scale - 0h */ + pseudo_bit_t pm_control_status_brdg_ext[0x00008]; + pseudo_bit_t data[0x00008]; +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ + pseudo_bit_t vpd_cap_id[0x00008]; /* 03h */ + pseudo_bit_t vpd_next_cap_id[0x00008]; + pseudo_bit_t vpd_address[0x0000f]; + pseudo_bit_t f[0x00001]; +/* -------------- */ + pseudo_bit_t vpd_data[0x00020]; +/* -------------- */ + pseudo_bit_t reserved11[0x00040]; +/* -------------- */ + pseudo_bit_t pciex_cap_id[0x00008]; /* PCI-Express capability ID - 10h */ + pseudo_bit_t pciex_next_cap_ptr[0x00008]; + pseudo_bit_t pciex_cap[0x00010]; /* [3:0] Capability version - 1h + [7:4] Device/Port Type - 0h + [8] Slot implemented - 0h + [13:9] Interrupt message number + */ +/* -------------- */ + pseudo_bit_t device_cap[0x00020]; /* [2:0] Max_Payload_Size supported - 2h + [4:3] Phantom Function supported - 0h + [5] Extended Tag Filed supported - 0h + [8:6] Endpoint L0s Acceptable Latency - TBD + [11:9] Endpoint L1 Acceptable Latency - TBD + [12] Attention Button Present - configured through InfiniBurn + [13] Attention Indicator Present - configured through InfiniBurn + [14] Power Indicator Present - configured through InfiniBurn + [25:18] Captured Slot Power Limit Value + [27:26] Captured Slot Power Limit Scale */ +/* -------------- */ + pseudo_bit_t device_control[0x00010]; + pseudo_bit_t device_status[0x00010]; +/* -------------- */ + pseudo_bit_t link_cap[0x00020]; /* [3:0] Maximum Link Speed - 1h + [9:4] Maximum Link Width - 8h + [11:10] Active State Power Management Support - 3h + [14:12] L0s Exit Latency - TBD + [17:15] L1 Exit Latency - TBD + [31:24] Port Number - 0h */ +/* -------------- */ + pseudo_bit_t link_control[0x00010]; + pseudo_bit_t link_status[0x00010]; /* [3:0] Link Speed - 1h + [9:4] Negotiated Link Width + [12] Slot clock configuration - 1h */ +/* -------------- */ + pseudo_bit_t reserved12[0x00260]; +/* -------------- */ + pseudo_bit_t advanced_error_reporting_cap_id[0x00010];/* 0001h. */ + pseudo_bit_t capability_version[0x00004];/* 1h */ + pseudo_bit_t next_capability_offset[0x0000c];/* 0h */ +/* -------------- */ + pseudo_bit_t uncorrectable_error_status_register[0x00020];/* 0 Training Error Status + 4 Data Link Protocol Error Status + 12 Poisoned TLP Status + 13 Flow Control Protocol Error Status + 14 Completion Timeout Status + 15 Completer Abort Status + 16 Unexpected Completion Status + 17 Receiver Overflow Status + 18 Malformed TLP Status + 19 ECRC Error Status + 20 Unsupported Request Error Status */ +/* -------------- */ + pseudo_bit_t uncorrectable_error_mask_register[0x00020];/* 0 Training Error Mask + 4 Data Link Protocol Error Mask + 12 Poisoned TLP Mask + 13 Flow Control Protocol Error Mask + 14 Completion Timeout Mask + 15 Completer Abort Mask + 16 Unexpected Completion Mask + 17 Receiver Overflow Mask + 18 Malformed TLP Mask + 19 ECRC Error Mask + 20 Unsupported Request Error Mask */ +/* -------------- */ + pseudo_bit_t uncorrectable_severity_mask_register[0x00020];/* 0 Training Error Severity + 4 Data Link Protocol Error Severity + 12 Poisoned TLP Severity + 13 Flow Control Protocol Error Severity + 14 Completion Timeout Severity + 15 Completer Abort Severity + 16 Unexpected Completion Severity + 17 Receiver Overflow Severity + 18 Malformed TLP Severity + 19 ECRC Error Severity + 20 Unsupported Request Error Severity */ +/* -------------- */ + pseudo_bit_t correctable_error_status_register[0x00020];/* 0 Receiver Error Status + 6 Bad TLP Status + 7 Bad DLLP Status + 8 REPLAY_NUM Rollover Status + 12 Replay Timer Timeout Status */ +/* -------------- */ + pseudo_bit_t correctable_error_mask_register[0x00020];/* 0 Receiver Error Mask + 6 Bad TLP Mask + 7 Bad DLLP Mask + 8 REPLAY_NUM Rollover Mask + 12 Replay Timer Timeout Mask */ +/* -------------- */ + pseudo_bit_t advance_error_capabilities_and_control_register[0x00020]; +/* -------------- */ + struct arbelprm_header_log_register_st header_log_register; +/* -------------- */ + pseudo_bit_t reserved13[0x006a0]; +/* -------------- */ +}; + +/* Event Data Field - Performance Monitor */ + +struct arbelprm_performance_monitor_event_st { /* Little Endian */ + struct arbelprm_performance_monitors_st performance_monitor_snapshot;/* Performance monitor snapshot */ +/* -------------- */ + pseudo_bit_t monitor_number[0x00008];/* 0x01 - SQPC + 0x02 - RQPC + 0x03 - CQC + 0x04 - Rkey + 0x05 - TLB + 0x06 - port0 + 0x07 - port1 */ + pseudo_bit_t reserved0[0x00018]; +/* -------------- */ + pseudo_bit_t reserved1[0x00040]; +/* -------------- */ +}; + +/* Event_data Field - Page Faults */ + +struct arbelprm_page_fault_event_data_st { /* Little Endian */ + pseudo_bit_t va_h[0x00020]; /* Virtual Address[63:32] this page fault is reported on */ +/* -------------- */ + pseudo_bit_t va_l[0x00020]; /* Virtual Address[63:32] this page fault is reported on */ +/* -------------- */ + pseudo_bit_t mem_key[0x00020]; /* Memory Key this page fault is reported on */ +/* -------------- */ + pseudo_bit_t qp[0x00018]; /* QP this page fault is reported on */ + pseudo_bit_t reserved0[0x00003]; + pseudo_bit_t a[0x00001]; /* If set the memory access that caused the page fault was atomic */ + pseudo_bit_t lw[0x00001]; /* If set the memory access that caused the page fault was local write */ + pseudo_bit_t lr[0x00001]; /* If set the memory access that caused the page fault was local read */ + pseudo_bit_t rw[0x00001]; /* If set the memory access that caused the page fault was remote write */ + pseudo_bit_t rr[0x00001]; /* If set the memory access that caused the page fault was remote read */ +/* -------------- */ + pseudo_bit_t pd[0x00018]; /* PD this page fault is reported on */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t prefetch_len[0x00020]; /* Indicates how many subsequent pages in the same memory region/window will be accessed by the following transaction after this page fault is resolved. measured in bytes. SW can use this information in order to page-in the subsequent pages if they are not present. */ +/* -------------- */ +}; + +/* WQE segments format */ + +struct arbelprm_wqe_segment_st { /* Little Endian */ + struct arbelprm_send_wqe_segment_st send_wqe_segment;/* Send WQE segment format */ +/* -------------- */ + pseudo_bit_t reserved0[0x00280]; +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_mlx_st mlx_wqe_segment_ctrl;/* MLX WQE segment format */ +/* -------------- */ + pseudo_bit_t reserved1[0x00100]; +/* -------------- */ + struct arbelprm_wqe_segment_ctrl_recv_st recv_wqe_segment_ctrl;/* Receive segment format */ +/* -------------- */ + pseudo_bit_t reserved2[0x00080]; +/* -------------- */ +}; + +/* Event_data Field - Port State Change */ + +struct arbelprm_port_state_change_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ + pseudo_bit_t reserved1[0x0001c]; + pseudo_bit_t p[0x00002]; /* Port number (1 or 2) */ + pseudo_bit_t reserved2[0x00002]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ +}; + +/* Event_data Field - Completion Queue Error */ + +struct arbelprm_completion_queue_error_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x00020]; +/* -------------- */ + pseudo_bit_t syndrome[0x00008]; /* Error syndrome + 0x01 - CQ overrun + 0x02 - CQ access violation error */ + pseudo_bit_t reserved2[0x00018]; +/* -------------- */ + pseudo_bit_t reserved3[0x00060]; +/* -------------- */ +}; + +/* Event_data Field - Completion Event */ + +struct arbelprm_completion_event_st { /* Little Endian */ + pseudo_bit_t cqn[0x00018]; /* CQ number event is reported for */ + pseudo_bit_t reserved0[0x00008]; +/* -------------- */ + pseudo_bit_t reserved1[0x000a0]; +/* -------------- */ +}; + +/* Event Queue Entry */ + +struct arbelprm_event_queue_entry_st { /* Little Endian */ + pseudo_bit_t event_sub_type[0x00008];/* Event Sub Type. + Defined for events which have sub types, zero elsewhere. */ + pseudo_bit_t reserved0[0x00008]; + pseudo_bit_t event_type[0x00008]; /* Event Type */ + pseudo_bit_t reserved1[0x00008]; +/* -------------- */ + pseudo_bit_t event_data[6][0x00020];/* Delivers auxilary data to handle event. */ +/* -------------- */ + pseudo_bit_t reserved2[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner of the entry + 0 SW + 1 HW */ + pseudo_bit_t reserved3[0x00018]; +/* -------------- */ +}; + +/* QP/EE State Transitions Command Parameters */ + +struct arbelprm_qp_ee_state_transitions_st { /* Little Endian */ + pseudo_bit_t opt_param_mask[0x00020];/* This field defines which optional parameters are passed. Each bit specifies whether optional parameter is passed (set) or not (cleared). The optparammask is defined for each QP/EE command. */ +/* -------------- */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + struct arbelprm_queue_pair_ee_context_entry_st qpc_eec_data;/* QPC/EEC data */ +/* -------------- */ + pseudo_bit_t reserved1[0x009c0]; +/* -------------- */ +}; + +/* Completion Queue Entry Format */ + +struct arbelprm_completion_queue_entry_st { /* Little Endian */ + pseudo_bit_t my_qpn[0x00018]; /* Indicates the QP for which completion is being reported */ + pseudo_bit_t reserved0[0x00004]; + pseudo_bit_t ver[0x00004]; /* CQE version. + 0 for InfiniHost-III-EX */ +/* -------------- */ + pseudo_bit_t my_ee[0x00018]; /* EE context (for RD only). + Invalid for Bind and Nop operation on RD. + For non RD services this filed reports the CQE timestamp. The Timestamp is a free running counter that is incremented every TimeStampGranularity tick. The counter rolls-over when it reaches saturation. TimeStampGranularity is configured in the INIT_HCA command. This feature is currently not supported. + */ + pseudo_bit_t checksum_15_8[0x00008];/* Checksum[15:8] - See IPoverIB checksum offloading chapter */ +/* -------------- */ + pseudo_bit_t rqpn[0x00018]; /* Remote (source) QP number. Valid in Responder CQE only for Datagram QP. */ + pseudo_bit_t checksum_7_0[0x00008]; /* Checksum[7:0] - See IPoverIB checksum offloading chapter */ +/* -------------- */ + pseudo_bit_t rlid[0x00010]; /* Remote (source) LID of the message. Valid in Responder of UD QP CQE only. */ + pseudo_bit_t ml_path[0x00007]; /* My (destination) LID path bits - these are the lowemost LMC bits of the DLID in an incoming UD packet, higher bits of this field, that are not part of the LMC bits are zeroed by HW. + Valid in responder of UD QP CQE only. + Invalid if incoming message DLID is the permissive LID or incoming message is multicast. */ + pseudo_bit_t g[0x00001]; /* GRH present indicator. Valid in Responder of UD QP CQE only. */ + pseudo_bit_t ipok[0x00001]; /* IP OK - See IPoverIB checksum offloading chapter */ + pseudo_bit_t reserved1[0x00003]; + pseudo_bit_t sl[0x00004]; /* Service Level of the message. Valid in Responder of UD QP CQE only. */ +/* -------------- */ + pseudo_bit_t immediate_ethertype_pkey_indx_eecredits[0x00020];/* Valid for receive queue completion only. + If Opcode field indicates that this was send/write with immediate, this field contains immediate field of the packet. + If completion corresponds to RAW receive queue, bits 15:0 contain Ethertype field of the packet. + If completion corresponds to GSI receive queue, bits 31:16 contain index in PKey table that matches PKey of the message arrived. + If Opcode field indicates that this was send and invalidate, this field contains the key that was invalidated. + For CQE of send queue of the reliable connection service (but send and invalide), bits [4:0] of this field contain the encoded EEcredits received in last ACK of the message. */ +/* -------------- */ + pseudo_bit_t byte_cnt[0x00020]; /* Byte count of data actually transferred (valid for receive queue completions only) */ +/* -------------- */ + pseudo_bit_t reserved2[0x00006]; + pseudo_bit_t wqe_adr[0x0001a]; /* Bits 31:6 of WQE virtual address completion is reported for. The 6 least significant bits are zero. */ +/* -------------- */ + pseudo_bit_t reserved3[0x00007]; + pseudo_bit_t owner[0x00001]; /* Owner field. Zero value of this field means SW ownership of CQE. */ + pseudo_bit_t reserved4[0x0000f]; + pseudo_bit_t s[0x00001]; /* If set, completion is reported for Send queue, if cleared - receive queue. */ + pseudo_bit_t opcode[0x00008]; /* The opcode of WQE completion is reported for. + For CQEs corresponding to send completion, NOPCODE field of the WQE is copied to this field. + For CQEs corresponding to receive completions, opcode field of last packet in the message copied to this field. + For CQEs corresponding to the receive queue of QPs mapped to QP1, the opcode will be SEND with Immediate (messages are guaranteed to be SEND only) + + The following values are reported in case of completion with error: + 0xFE - For completion with error on Receive Queues + 0xFF - For completion with error on Send Queues */ +/* -------------- */ +}; + +/* */ + +struct arbelprm_ecc_detect_event_data_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t cause_lsb[0x00001]; + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t cause_msb[0x00001]; + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t err_rmw[0x00001]; + pseudo_bit_t err_src_id[0x00003]; + pseudo_bit_t err_da[0x00002]; + pseudo_bit_t err_ba[0x00002]; + pseudo_bit_t reserved3[0x00011]; + pseudo_bit_t overflow[0x00001]; +/* -------------- */ + pseudo_bit_t err_ra[0x00010]; + pseudo_bit_t err_ca[0x00010]; +/* -------------- */ +}; + +/* Event_data Field - ECC Detection Event */ + +struct arbelprm_scrubbing_event_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00080]; +/* -------------- */ + pseudo_bit_t cause_lsb[0x00001]; /* data integrity error cause: + single ECC error in the 64bit lsb data, on the rise edge of the clock */ + pseudo_bit_t reserved1[0x00002]; + pseudo_bit_t cause_msb[0x00001]; /* data integrity error cause: + single ECC error in the 64bit msb data, on the fall edge of the clock */ + pseudo_bit_t reserved2[0x00002]; + pseudo_bit_t err_rmw[0x00001]; /* transaction type: + 0 - read + 1 - read/modify/write */ + pseudo_bit_t err_src_id[0x00003]; /* source of the transaction: 0x4 - PCI, other - internal or IB */ + pseudo_bit_t err_da[0x00002]; /* Error DIMM address */ + pseudo_bit_t err_ba[0x00002]; /* Error bank address */ + pseudo_bit_t reserved3[0x00011]; + pseudo_bit_t overflow[0x00001]; /* Fatal: ECC error FIFO overflow - ECC errors were detected, which may or may not have been corrected by InfiniHost-III-EX */ +/* -------------- */ + pseudo_bit_t err_ra[0x00010]; /* Error row address */ + pseudo_bit_t err_ca[0x00010]; /* Error column address */ +/* -------------- */ +}; + +/* Miscellaneous Counters */ + +struct arbelprm_misc_counters_st { /* Little Endian */ + pseudo_bit_t ddr_scan_cnt[0x00020]; /* Number of times whole of LAM was scanned */ +/* -------------- */ + pseudo_bit_t reserved0[0x007e0]; +/* -------------- */ +}; + +/* LAM_EN Output Parameter */ + +struct arbelprm_lam_en_out_param_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00040]; +/* -------------- */ +}; + +/* Extended_Completion_Queue_Entry */ + +struct arbelprm_extended_completion_queue_entry_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* */ + +struct arbelprm_eq_cmd_doorbell_st { /* Little Endian */ + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ +}; + +/* 0 */ + +struct arbelprm_arbel_prm_st { /* Little Endian */ + struct arbelprm_completion_queue_entry_st completion_queue_entry;/* Completion Queue Entry Format */ +/* -------------- */ + pseudo_bit_t reserved0[0x7ff00]; +/* -------------- */ + struct arbelprm_qp_ee_state_transitions_st qp_ee_state_transitions;/* QP/EE State Transitions Command Parameters */ +/* -------------- */ + pseudo_bit_t reserved1[0x7f000]; +/* -------------- */ + struct arbelprm_event_queue_entry_st event_queue_entry;/* Event Queue Entry */ +/* -------------- */ + pseudo_bit_t reserved2[0x7ff00]; +/* -------------- */ + struct arbelprm_completion_event_st completion_event;/* Event_data Field - Completion Event */ +/* -------------- */ + pseudo_bit_t reserved3[0x7ff40]; +/* -------------- */ + struct arbelprm_completion_queue_error_st completion_queue_error;/* Event_data Field - Completion Queue Error */ +/* -------------- */ + pseudo_bit_t reserved4[0x7ff40]; +/* -------------- */ + struct arbelprm_port_state_change_st port_state_change;/* Event_data Field - Port State Change */ +/* -------------- */ + pseudo_bit_t reserved5[0x7ff40]; +/* -------------- */ + struct arbelprm_wqe_segment_st wqe_segment;/* WQE segments format */ +/* -------------- */ + pseudo_bit_t reserved6[0x7f000]; +/* -------------- */ + struct arbelprm_page_fault_event_data_st page_fault_event_data;/* Event_data Field - Page Faults */ +/* -------------- */ + pseudo_bit_t reserved7[0x7ff40]; +/* -------------- */ + struct arbelprm_performance_monitor_event_st performance_monitor_event;/* Event Data Field - Performance Monitor */ +/* -------------- */ + pseudo_bit_t reserved8[0xfff20]; +/* -------------- */ + struct arbelprm_mt25208_type0_st mt25208_type0;/* InfiniHost-III-EX Type0 Configuration Header */ +/* -------------- */ + pseudo_bit_t reserved9[0x7f000]; +/* -------------- */ + struct arbelprm_qp_ee_event_st qp_ee_event;/* Event_data Field - QP/EE Events */ +/* -------------- */ + pseudo_bit_t reserved10[0x00040]; +/* -------------- */ + struct arbelprm_gpio_event_data_st gpio_event_data; +/* -------------- */ + pseudo_bit_t reserved11[0x7fe40]; +/* -------------- */ + struct arbelprm_ud_address_vector_st ud_address_vector;/* UD Address Vector */ +/* -------------- */ + pseudo_bit_t reserved12[0x7ff00]; +/* -------------- */ + struct arbelprm_queue_pair_ee_context_entry_st queue_pair_ee_context_entry;/* QP and EE Context Entry */ +/* -------------- */ + pseudo_bit_t reserved13[0x7fa00]; +/* -------------- */ + struct arbelprm_address_path_st address_path;/* Address Path */ +/* -------------- */ + pseudo_bit_t reserved14[0x7ff00]; +/* -------------- */ + struct arbelprm_completion_queue_context_st completion_queue_context;/* Completion Queue Context Table Entry */ +/* -------------- */ + pseudo_bit_t reserved15[0x7fe00]; +/* -------------- */ + struct arbelprm_mpt_st mpt; /* Memory Protection Table (MPT) Entry */ +/* -------------- */ + pseudo_bit_t reserved16[0x7fe00]; +/* -------------- */ + struct arbelprm_mtt_st mtt; /* Memory Translation Table (MTT) Entry */ +/* -------------- */ + pseudo_bit_t reserved17[0x7ffc0]; +/* -------------- */ + struct arbelprm_eqc_st eqc; /* Event Queue Context Table Entry */ +/* -------------- */ + pseudo_bit_t reserved18[0x7fe00]; +/* -------------- */ + struct arbelprm_performance_monitors_st performance_monitors;/* Performance Monitors */ +/* -------------- */ + pseudo_bit_t reserved19[0x7ff80]; +/* -------------- */ + struct arbelprm_hca_command_register_st hca_command_register;/* HCA Command Register (HCR) */ +/* -------------- */ + pseudo_bit_t reserved20[0xfff20]; +/* -------------- */ + struct arbelprm_init_hca_st init_hca;/* INIT_HCA & QUERY_HCA Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved21[0x7f000]; +/* -------------- */ + struct arbelprm_qpcbaseaddr_st qpcbaseaddr;/* QPC/EEC/CQC/EQC/RDB Parameters */ +/* -------------- */ + pseudo_bit_t reserved22[0x7fc00]; +/* -------------- */ + struct arbelprm_udavtable_memory_parameters_st udavtable_memory_parameters;/* Memory Access Parameters for UD Address Vector Table */ +/* -------------- */ + pseudo_bit_t reserved23[0x7ffc0]; +/* -------------- */ + struct arbelprm_multicastparam_st multicastparam;/* Multicast Support Parameters */ +/* -------------- */ + pseudo_bit_t reserved24[0x7ff00]; +/* -------------- */ + struct arbelprm_tptparams_st tptparams;/* Translation and Protection Tables Parameters */ +/* -------------- */ + pseudo_bit_t reserved25[0x7ff00]; +/* -------------- */ + struct arbelprm_enable_lam_st enable_lam;/* ENABLE_LAM Parameters Block */ +/* -------------- */ + struct arbelprm_access_lam_st access_lam; +/* -------------- */ + pseudo_bit_t reserved26[0x7f700]; +/* -------------- */ + struct arbelprm_dimminfo_st dimminfo;/* Logical DIMM Information */ +/* -------------- */ + pseudo_bit_t reserved27[0x7ff00]; +/* -------------- */ + struct arbelprm_query_fw_st query_fw;/* QUERY_FW Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved28[0x7f800]; +/* -------------- */ + struct arbelprm_query_adapter_st query_adapter;/* QUERY_ADAPTER Parameters Block */ +/* -------------- */ + pseudo_bit_t reserved29[0x7f800]; +/* -------------- */ + struct arbelprm_query_dev_lim_st query_dev_lim;/* Query Device Limitations */ +/* -------------- */ + pseudo_bit_t reserved30[0x7f800]; +/* -------------- */ + struct arbelprm_uar_params_st uar_params;/* UAR Parameters */ +/* -------------- */ + pseudo_bit_t reserved31[0x7ff00]; +/* -------------- */ + struct arbelprm_init_ib_st init_ib; /* INIT_IB Parameters */ +/* -------------- */ + pseudo_bit_t reserved32[0x7f800]; +/* -------------- */ + struct arbelprm_mgm_entry_st mgm_entry;/* Multicast Group Member */ +/* -------------- */ + pseudo_bit_t reserved33[0x7fe00]; +/* -------------- */ + struct arbelprm_set_ib_st set_ib; /* SET_IB Parameters */ +/* -------------- */ + pseudo_bit_t reserved34[0x7fe00]; +/* -------------- */ + struct arbelprm_rd_send_doorbell_st rd_send_doorbell;/* RD-send doorbell */ +/* -------------- */ + pseudo_bit_t reserved35[0x7ff80]; +/* -------------- */ + struct arbelprm_send_doorbell_st send_doorbell;/* Send doorbell */ +/* -------------- */ + pseudo_bit_t reserved36[0x7ffc0]; +/* -------------- */ + struct arbelprm_receive_doorbell_st receive_doorbell;/* Receive doorbell */ +/* -------------- */ + pseudo_bit_t reserved37[0x7ffc0]; +/* -------------- */ + struct arbelprm_cq_cmd_doorbell_st cq_cmd_doorbell;/* CQ Doorbell */ +/* -------------- */ + pseudo_bit_t reserved38[0xfffc0]; +/* -------------- */ + struct arbelprm_uar_st uar; /* User Access Region */ +/* -------------- */ + pseudo_bit_t reserved39[0x7c000]; +/* -------------- */ + struct arbelprm_mgmqp_st mgmqp; /* Multicast Group Member QP */ +/* -------------- */ + pseudo_bit_t reserved40[0x7ffe0]; +/* -------------- */ + struct arbelprm_query_debug_msg_st query_debug_msg;/* Query Debug Message */ +/* -------------- */ + pseudo_bit_t reserved41[0x7f800]; +/* -------------- */ + struct arbelprm_mad_ifc_st mad_ifc; /* MAD_IFC Input Mailbox */ +/* -------------- */ + pseudo_bit_t reserved42[0x00900]; +/* -------------- */ + struct arbelprm_mad_ifc_input_modifier_st mad_ifc_input_modifier;/* MAD_IFC Input Modifier */ +/* -------------- */ + pseudo_bit_t reserved43[0x7e6e0]; +/* -------------- */ + struct arbelprm_resize_cq_st resize_cq;/* Resize CQ Input Mailbox */ +/* -------------- */ + pseudo_bit_t reserved44[0x7fe00]; +/* -------------- */ + struct arbelprm_completion_with_error_st completion_with_error;/* Completion with Error CQE */ +/* -------------- */ + pseudo_bit_t reserved45[0x7ff00]; +/* -------------- */ + struct arbelprm_hcr_completion_event_st hcr_completion_event;/* Event_data Field - HCR Completion Event */ +/* -------------- */ + pseudo_bit_t reserved46[0x7ff40]; +/* -------------- */ + struct arbelprm_transport_and_ci_error_counters_st transport_and_ci_error_counters;/* Transport and CI Error Counters */ +/* -------------- */ + pseudo_bit_t reserved47[0x7f000]; +/* -------------- */ + struct arbelprm_performance_counters_st performance_counters;/* Performance Counters */ +/* -------------- */ + pseudo_bit_t reserved48[0x9ff800]; +/* -------------- */ + struct arbelprm_fast_registration_segment_st fast_registration_segment;/* Fast Registration Segment */ +/* -------------- */ + pseudo_bit_t reserved49[0x7ff00]; +/* -------------- */ + struct arbelprm_pbl_st pbl; /* Physical Buffer List */ +/* -------------- */ + pseudo_bit_t reserved50[0x7ff00]; +/* -------------- */ + struct arbelprm_srq_context_st srq_context;/* SRQ Context */ +/* -------------- */ + pseudo_bit_t reserved51[0x7fe80]; +/* -------------- */ + struct arbelprm_mod_stat_cfg_st mod_stat_cfg;/* MOD_STAT_CFG */ +/* -------------- */ + pseudo_bit_t reserved52[0x7f800]; +/* -------------- */ + struct arbelprm_virtual_physical_mapping_st virtual_physical_mapping;/* Virtual and Physical Mapping */ +/* -------------- */ + pseudo_bit_t reserved53[0x7ff80]; +/* -------------- */ + struct arbelprm_cq_ci_db_record_st cq_ci_db_record;/* CQ_CI_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved54[0x7ffc0]; +/* -------------- */ + struct arbelprm_cq_arm_db_record_st cq_arm_db_record;/* CQ_ARM_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved55[0x7ffc0]; +/* -------------- */ + struct arbelprm_qp_db_record_st qp_db_record;/* QP_DB_Record */ +/* -------------- */ + pseudo_bit_t reserved56[0x1fffc0]; +/* -------------- */ + struct arbelprm_configuration_registers_st configuration_registers;/* InfiniHost III EX Configuration Registers */ +/* -------------- */ + struct arbelprm_eq_set_ci_table_st eq_set_ci_table;/* EQ Set CI DBs Table */ +/* -------------- */ + pseudo_bit_t reserved57[0x01000]; +/* -------------- */ + struct arbelprm_eq_arm_db_region_st eq_arm_db_region;/* EQ Arm Doorbell Region */ +/* -------------- */ + pseudo_bit_t reserved58[0x00fc0]; +/* -------------- */ + struct arbelprm_clr_int_st clr_int; /* Clear Interrupt Register */ +/* -------------- */ + pseudo_bit_t reserved59[0xffcfc0]; +/* -------------- */ +}; +#endif /* H_prefix_arbelprm_bits_fixnames_MT25218_PRM_csp_H */ diff --git a/src/drivers/infiniband/arbel.c b/src/drivers/infiniband/arbel.c new file mode 100644 index 00000000..51246d7e --- /dev/null +++ b/src/drivers/infiniband/arbel.c @@ -0,0 +1,2130 @@ +/* + * Copyright (C) 2007 Michael Brown . + * + * Based in part upon the original driver by Mellanox Technologies + * Ltd. Portions may be Copyright (c) Mellanox Technologies Ltd. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "arbel.h" + +/** + * @file + * + * Mellanox Arbel Infiniband HCA + * + */ + +/* Port to use */ +#define PXE_IB_PORT 1 + +/*************************************************************************** + * + * Queue number allocation + * + *************************************************************************** + */ + +/** + * Allocate queue number + * + * @v q_inuse Queue usage bitmask + * @v max_inuse Maximum number of in-use queues + * @ret qn_offset Free queue number offset, or negative error + */ +static int arbel_alloc_qn_offset ( arbel_bitmask_t *q_inuse, + unsigned int max_inuse ) { + unsigned int qn_offset = 0; + arbel_bitmask_t mask = 1; + + while ( qn_offset < max_inuse ) { + if ( ( mask & *q_inuse ) == 0 ) { + *q_inuse |= mask; + return qn_offset; + } + qn_offset++; + mask <<= 1; + if ( ! mask ) { + mask = 1; + q_inuse++; + } + } + return -ENFILE; +} + +/** + * Free queue number + * + * @v q_inuse Queue usage bitmask + * @v qn_offset Queue number offset + */ +static void arbel_free_qn_offset ( arbel_bitmask_t *q_inuse, int qn_offset ) { + arbel_bitmask_t mask; + + mask = ( 1 << ( qn_offset % ( 8 * sizeof ( mask ) ) ) ); + q_inuse += ( qn_offset / ( 8 * sizeof ( mask ) ) ); + *q_inuse &= ~mask; +} + +/*************************************************************************** + * + * HCA commands + * + *************************************************************************** + */ + +/** + * Wait for Arbel command completion + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_cmd_wait ( struct arbel *arbel, + struct arbelprm_hca_command_register *hcr ) { + unsigned int wait; + + for ( wait = ARBEL_HCR_MAX_WAIT_MS ; wait ; wait-- ) { + hcr->u.dwords[6] = + readl ( arbel->config + ARBEL_HCR_REG ( 6 ) ); + if ( MLX_GET ( hcr, go ) == 0 ) + return 0; + mdelay ( 1 ); + } + return -EBUSY; +} + +/** + * Issue HCA command + * + * @v arbel Arbel device + * @v command Command opcode, flags and input/output lengths + * @v op_mod Opcode modifier (0 if no modifier applicable) + * @v in Input parameters + * @v in_mod Input modifier (0 if no modifier applicable) + * @v out Output parameters + * @ret rc Return status code + */ +static int arbel_cmd ( struct arbel *arbel, unsigned long command, + unsigned int op_mod, const void *in, + unsigned int in_mod, void *out ) { + struct arbelprm_hca_command_register hcr; + unsigned int opcode = ARBEL_HCR_OPCODE ( command ); + size_t in_len = ARBEL_HCR_IN_LEN ( command ); + size_t out_len = ARBEL_HCR_OUT_LEN ( command ); + void *in_buffer; + void *out_buffer; + unsigned int status; + unsigned int i; + int rc; + + assert ( in_len <= ARBEL_MBOX_SIZE ); + assert ( out_len <= ARBEL_MBOX_SIZE ); + + DBGC2 ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", + arbel, opcode, in_len, + ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, + ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); + + /* Check that HCR is free */ + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p command interface locked\n", arbel ); + return rc; + } + + /* Prepare HCR */ + memset ( &hcr, 0, sizeof ( hcr ) ); + in_buffer = &hcr.u.dwords[0]; + if ( in_len && ( command & ARBEL_HCR_IN_MBOX ) ) { + in_buffer = arbel->mailbox_in; + MLX_FILL_1 ( &hcr, 1, in_param_l, virt_to_bus ( in_buffer ) ); + } + memcpy ( in_buffer, in, in_len ); + MLX_FILL_1 ( &hcr, 2, input_modifier, in_mod ); + out_buffer = &hcr.u.dwords[3]; + if ( out_len && ( command & ARBEL_HCR_OUT_MBOX ) ) { + out_buffer = arbel->mailbox_out; + MLX_FILL_1 ( &hcr, 4, out_param_l, + virt_to_bus ( out_buffer ) ); + } + MLX_FILL_3 ( &hcr, 6, + opcode, opcode, + opcode_modifier, op_mod, + go, 1 ); + DBGC2_HD ( arbel, &hcr, sizeof ( hcr ) ); + if ( in_len ) { + DBGC2 ( arbel, "Input:\n" ); + DBGC2_HD ( arbel, in, ( ( in_len < 512 ) ? in_len : 512 ) ); + } + + /* Issue command */ + for ( i = 0 ; i < ( sizeof ( hcr ) / sizeof ( hcr.u.dwords[0] ) ) ; + i++ ) { + writel ( hcr.u.dwords[i], + arbel->config + ARBEL_HCR_REG ( i ) ); + barrier(); + } + + /* Wait for command completion */ + if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { + DBGC ( arbel, "Arbel %p timed out waiting for command:\n", + arbel ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return rc; + } + + /* Check command status */ + status = MLX_GET ( &hcr, status ); + if ( status != 0 ) { + DBGC ( arbel, "Arbel %p command failed with status %02x:\n", + arbel, status ); + DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); + return -EIO; + } + + /* Read output parameters, if any */ + hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); + hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); + memcpy ( out, out_buffer, out_len ); + if ( out_len ) { + DBGC2 ( arbel, "Output:\n" ); + DBGC2_HD ( arbel, out, ( ( out_len < 512 ) ? out_len : 512 ) ); + } + + return 0; +} + +static inline int +arbel_cmd_query_dev_lim ( struct arbel *arbel, + struct arbelprm_query_dev_lim *dev_lim ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, + 1, sizeof ( *dev_lim ) ), + 0, NULL, 0, dev_lim ); +} + +static inline int +arbel_cmd_query_fw ( struct arbel *arbel, struct arbelprm_query_fw *fw ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_FW, + 1, sizeof ( *fw ) ), + 0, NULL, 0, fw ); +} + +static inline int +arbel_cmd_init_hca ( struct arbel *arbel, + const struct arbelprm_init_hca *init_hca ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_HCA, + 1, sizeof ( *init_hca ) ), + 0, init_hca, 0, NULL ); +} + +static inline int +arbel_cmd_close_hca ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_HCA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_init_ib ( struct arbel *arbel, unsigned int port, + const struct arbelprm_init_ib *init_ib ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_IB, + 1, sizeof ( *init_ib ) ), + 0, init_ib, port, NULL ); +} + +static inline int +arbel_cmd_close_ib ( struct arbel *arbel, unsigned int port ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_IB ), + 0, NULL, port, NULL ); +} + +static inline int +arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mpt *mpt ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_MPT, + 1, sizeof ( *mpt ) ), + 0, mpt, index, NULL ); +} + +static inline int +arbel_cmd_sw2hw_eq ( struct arbel *arbel, unsigned int index, + const struct arbelprm_eqc *eqc ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_EQ, + 1, sizeof ( *eqc ) ), + 0, eqc, index, NULL ); +} + +static inline int +arbel_cmd_hw2sw_eq ( struct arbel *arbel, unsigned int index ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_EQ ), + 1, NULL, index, NULL ); +} + +static inline int +arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, + const struct arbelprm_completion_queue_context *cqctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_CQ, + 1, sizeof ( *cqctx ) ), + 0, cqctx, cqn, NULL ); +} + +static inline int +arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn, + struct arbelprm_completion_queue_context *cqctx) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_HW2SW_CQ, + 1, sizeof ( *cqctx ) ), + 0, NULL, cqn, cqctx ); +} + +static inline int +arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ){ + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ){ + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, + const struct arbelprm_qp_ee_state_transitions *ctx ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, + 1, sizeof ( *ctx ) ), + 0, ctx, qpn, NULL ); +} + +static inline int +arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_2RST_QPEE ), + 0x03, NULL, qpn, NULL ); +} + +static inline int +arbel_cmd_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MAD_IFC, + 1, sizeof ( *mad ), + 1, sizeof ( *mad ) ), + 0x03, mad, PXE_IB_PORT, mad ); +} + +static inline int +arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, + struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_READ_MGM, + 1, sizeof ( *mgm ) ), + 0, NULL, index, mgm ); +} + +static inline int +arbel_cmd_write_mgm ( struct arbel *arbel, unsigned int index, + const struct arbelprm_mgm_entry *mgm ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_WRITE_MGM, + 1, sizeof ( *mgm ) ), + 0, mgm, index, NULL ); +} + +static inline int +arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, + struct arbelprm_mgm_hash *hash ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MGID_HASH, + 1, sizeof ( *gid ), + 0, sizeof ( *hash ) ), + 0, gid, 0, hash ); +} + +static inline int +arbel_cmd_run_fw ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_RUN_FW ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_disable_lam ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_DISABLE_LAM ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_enable_lam ( struct arbel *arbel, struct arbelprm_access_lam *lam ) { + return arbel_cmd ( arbel, + ARBEL_HCR_OUT_CMD ( ARBEL_HCR_ENABLE_LAM, + 1, sizeof ( *lam ) ), + 1, NULL, 0, lam ); +} + +static inline int +arbel_cmd_unmap_icm ( struct arbel *arbel, unsigned int page_count ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM ), + 0, NULL, page_count, NULL ); +} + +static inline int +arbel_cmd_map_icm ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_unmap_icm_aux ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM_AUX ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_icm_aux ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM_AUX, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +static inline int +arbel_cmd_set_icm_size ( struct arbel *arbel, + const struct arbelprm_scalar_parameter *icm_size, + struct arbelprm_scalar_parameter *icm_aux_size ) { + return arbel_cmd ( arbel, + ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_SET_ICM_SIZE, + 0, sizeof ( *icm_size ), + 0, sizeof ( *icm_aux_size ) ), + 0, icm_size, 0, icm_aux_size ); +} + +static inline int +arbel_cmd_unmap_fa ( struct arbel *arbel ) { + return arbel_cmd ( arbel, + ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_FA ), + 0, NULL, 0, NULL ); +} + +static inline int +arbel_cmd_map_fa ( struct arbel *arbel, + const struct arbelprm_virtual_physical_mapping *map ) { + return arbel_cmd ( arbel, + ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_FA, + 1, sizeof ( *map ) ), + 0, map, 1, NULL ); +} + +/*************************************************************************** + * + * Completion queue operations + * + *************************************************************************** + */ + +/** + * Create completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @ret rc Return status code + */ +static int arbel_create_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq; + struct arbelprm_completion_queue_context cqctx; + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + unsigned int i; + int rc; + + /* Find a free completion queue number */ + cqn_offset = arbel_alloc_qn_offset ( arbel->cq_inuse, ARBEL_MAX_CQS ); + if ( cqn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of completion queues\n", arbel ); + rc = cqn_offset; + goto err_cqn_offset; + } + cq->cqn = ( arbel->limits.reserved_cqs + cqn_offset ); + + /* Allocate control structures */ + arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); + if ( ! arbel_cq ) { + rc = -ENOMEM; + goto err_arbel_cq; + } + arbel_cq->ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); + arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); + + /* Allocate completion queue itself */ + arbel_cq->cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); + arbel_cq->cqe = malloc_dma ( arbel_cq->cqe_size, + sizeof ( arbel_cq->cqe[0] ) ); + if ( ! arbel_cq->cqe ) { + rc = -ENOMEM; + goto err_cqe; + } + memset ( arbel_cq->cqe, 0, arbel_cq->cqe_size ); + for ( i = 0 ; i < cq->num_cqes ; i++ ) { + MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); + } + barrier(); + + /* Initialise doorbell records */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( ci_db_rec, 1, + res, ARBEL_UAR_RES_CQ_CI, + cq_number, cq->cqn ); + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( arm_db_rec, 1, + res, ARBEL_UAR_RES_CQ_ARM, + cq_number, cq->cqn ); + + /* Hand queue over to hardware */ + memset ( &cqctx, 0, sizeof ( cqctx ) ); + MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); + MLX_FILL_1 ( &cqctx, 2, start_address_l, + virt_to_bus ( arbel_cq->cqe ) ); + MLX_FILL_2 ( &cqctx, 3, + usr_page, arbel->limits.reserved_uars, + log_cq_size, fls ( cq->num_cqes - 1 ) ); + MLX_FILL_1 ( &cqctx, 5, c_eqn, ARBEL_NO_EQ ); + MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); + MLX_FILL_1 ( &cqctx, 13, + cq_ci_db_record, arbel_cq->ci_doorbell_idx ); + MLX_FILL_1 ( &cqctx, 14, + cq_state_db_record, arbel_cq->arm_doorbell_idx ); + if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { + DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", + arbel, strerror ( rc ) ); + goto err_sw2hw_cq; + } + + DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n", + arbel, cq->cqn, arbel_cq->cqe, + ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) ); + cq->dev_priv = arbel_cq; + return 0; + + err_sw2hw_cq: + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); + err_cqe: + free ( arbel_cq ); + err_arbel_cq: + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + err_cqn_offset: + return rc; +} + +/** + * Destroy completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + */ +static void arbel_destroy_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbelprm_completion_queue_context cqctx; + struct arbelprm_cq_ci_db_record *ci_db_rec; + struct arbelprm_cq_arm_db_record *arm_db_rec; + int cqn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " + "%s\n", arbel, cq->cqn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; + MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); + free ( arbel_cq ); + + /* Mark queue number as free */ + cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); + arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); + + cq->dev_priv = NULL; +} + +/*************************************************************************** + * + * Queue pair operations + * + *************************************************************************** + */ + +/** + * Create send work queue + * + * @v arbel_send_wq Send work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ +static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, + unsigned int num_wqes ) { + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_ud_send_wqe *next_wqe; + unsigned int wqe_idx_mask; + unsigned int i; + + /* Allocate work queue */ + arbel_send_wq->wqe_size = ( num_wqes * + sizeof ( arbel_send_wq->wqe[0] ) ); + arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, + sizeof ( arbel_send_wq->wqe[0] ) ); + if ( ! arbel_send_wq->wqe ) + return -ENOMEM; + memset ( arbel_send_wq->wqe, 0, arbel_send_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_send_wq->wqe[i].ud; + next_wqe = &arbel_send_wq->wqe[ ( i + 1 ) & wqe_idx_mask ].ud; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + } + + return 0; +} + +/** + * Create receive work queue + * + * @v arbel_recv_wq Receive work queue + * @v num_wqes Number of work queue entries + * @ret rc Return status code + */ +static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, + unsigned int num_wqes ) { + struct arbelprm_recv_wqe *wqe; + struct arbelprm_recv_wqe *next_wqe; + unsigned int wqe_idx_mask; + size_t nds; + unsigned int i; + unsigned int j; + + /* Allocate work queue */ + arbel_recv_wq->wqe_size = ( num_wqes * + sizeof ( arbel_recv_wq->wqe[0] ) ); + arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, + sizeof ( arbel_recv_wq->wqe[0] ) ); + if ( ! arbel_recv_wq->wqe ) + return -ENOMEM; + memset ( arbel_recv_wq->wqe, 0, arbel_recv_wq->wqe_size ); + + /* Link work queue entries */ + wqe_idx_mask = ( num_wqes - 1 ); + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + for ( i = 0 ; i < num_wqes ; i++ ) { + wqe = &arbel_recv_wq->wqe[i].recv; + next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; + MLX_FILL_1 ( &wqe->next, 0, nda_31_6, + ( virt_to_bus ( next_wqe ) >> 6 ) ); + MLX_FILL_1 ( &wqe->next, 1, nds, ( sizeof ( *wqe ) / 16 ) ); + for ( j = 0 ; ( ( ( void * ) &wqe->data[j] ) < + ( ( void * ) ( wqe + 1 ) ) ) ; j++ ) { + MLX_FILL_1 ( &wqe->data[j], 1, + l_key, ARBEL_INVALID_LKEY ); + } + } + + return 0; +} + +/** + * Create queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @ret rc Return status code + */ +static int arbel_create_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp; + struct arbelprm_qp_ee_state_transitions qpctx; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Find a free queue pair number */ + qpn_offset = arbel_alloc_qn_offset ( arbel->qp_inuse, ARBEL_MAX_QPS ); + if ( qpn_offset < 0 ) { + DBGC ( arbel, "Arbel %p out of queue pairs\n", arbel ); + rc = qpn_offset; + goto err_qpn_offset; + } + qp->qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); + + /* Allocate control structures */ + arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); + if ( ! arbel_qp ) { + rc = -ENOMEM; + goto err_arbel_qp; + } + arbel_qp->send.doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); + arbel_qp->recv.doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); + + /* Create send and receive work queues */ + if ( ( rc = arbel_create_send_wq ( &arbel_qp->send, + qp->send.num_wqes ) ) != 0 ) + goto err_create_send_wq; + if ( ( rc = arbel_create_recv_wq ( &arbel_qp->recv, + qp->recv.num_wqes ) ) != 0 ) + goto err_create_recv_wq; + + /* Initialise doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( send_db_rec, 1, + res, ARBEL_UAR_RES_SQ, + qp_number, qp->qpn ); + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( recv_db_rec, 0, counter, 0 ); + MLX_FILL_2 ( recv_db_rec, 1, + res, ARBEL_UAR_RES_RQ, + qp_number, qp->qpn ); + + /* Hand queue over to hardware */ + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_3 ( &qpctx, 2, + qpc_eec_data.de, 1, + qpc_eec_data.pm_state, 0x03 /* Always 0x03 for UD */, + qpc_eec_data.st, ARBEL_ST_UD ); + MLX_FILL_6 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, + qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), + qpc_eec_data.log_rq_stride, + ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ), + qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), + qpc_eec_data.log_sq_stride, + ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ) ); + MLX_FILL_1 ( &qpctx, 5, + qpc_eec_data.usr_page, arbel->limits.reserved_uars ); + MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, + PXE_IB_PORT ); + MLX_FILL_1 ( &qpctx, 27, qpc_eec_data.pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &qpctx, 29, qpc_eec_data.wqe_lkey, arbel->reserved_lkey ); + MLX_FILL_1 ( &qpctx, 30, qpc_eec_data.ssc, 1 ); + MLX_FILL_1 ( &qpctx, 33, qpc_eec_data.cqn_snd, qp->send.cq->cqn ); + MLX_FILL_1 ( &qpctx, 34, qpc_eec_data.snd_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->send.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 35, qpc_eec_data.snd_db_record_index, + arbel_qp->send.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 38, qpc_eec_data.rsc, 1 ); + MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn ); + MLX_FILL_1 ( &qpctx, 42, qpc_eec_data.rcv_wqe_base_adr_l, + ( virt_to_bus ( arbel_qp->recv.wqe ) >> 6 ) ); + MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.rcv_db_record_index, + arbel_qp->recv.doorbell_idx ); + MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey ); + if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rst2init_qpee; + } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + MLX_FILL_2 ( &qpctx, 4, + qpc_eec_data.mtu, ARBEL_MTU_2048, + qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */ ); + if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ + DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_init2rtr_qpee; + } + memset ( &qpctx, 0, sizeof ( qpctx ) ); + if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ + DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", + arbel, strerror ( rc ) ); + goto err_rtr2rts_qpee; + } + + DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->send.wqe, + ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) ); + DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n", + arbel, qp->qpn, arbel_qp->recv.wqe, + ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) ); + qp->dev_priv = arbel_qp; + return 0; + + err_rtr2rts_qpee: + err_init2rtr_qpee: + arbel_cmd_2rst_qpee ( arbel, qp->qpn ); + err_rst2init_qpee: + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + err_create_recv_wq: + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + err_create_send_wq: + free ( arbel_qp ); + err_arbel_qp: + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + err_qpn_offset: + return rc; +} + +/** + * Destroy queue pair + * + * @v ibdev Infiniband device + * @v qp Queue pair + */ +static void arbel_destroy_qp ( struct ib_device *ibdev, + struct ib_queue_pair *qp ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct arbelprm_qp_db_record *send_db_rec; + struct arbelprm_qp_db_record *recv_db_rec; + int qpn_offset; + int rc; + + /* Take ownership back from hardware */ + if ( ( rc = arbel_cmd_2rst_qpee ( arbel, qp->qpn ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL 2RST_QPEE failed on QPN %#lx: " + "%s\n", arbel, qp->qpn, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + + /* Clear doorbell records */ + send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; + recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; + MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); + + /* Free memory */ + free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); + free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); + free ( arbel_qp ); + + /* Mark queue number as free */ + qpn_offset = ( qp->qpn - ARBEL_QPN_BASE - arbel->limits.reserved_qps ); + arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); + + qp->dev_priv = NULL; +} + +/*************************************************************************** + * + * Work request operations + * + *************************************************************************** + */ + +/** + * Ring doorbell register in UAR + * + * @v arbel Arbel device + * @v db_reg Doorbell register structure + * @v offset Address of doorbell + */ +static void arbel_ring_doorbell ( struct arbel *arbel, + union arbelprm_doorbell_register *db_reg, + unsigned int offset ) { + + DBGC2 ( arbel, "Arbel %p ringing doorbell %08lx:%08lx at %lx\n", + arbel, db_reg->dword[0], db_reg->dword[1], + virt_to_phys ( arbel->uar + offset ) ); + + barrier(); + writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); + barrier(); + writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); +} + +/** GID used for GID-less send work queue entries */ +static const struct ib_gid arbel_no_gid = { + { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } } +}; + +/** + * Post send work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v av Address vector + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_send ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_address_vector *av, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct ib_work_queue *wq = &qp->send; + struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; + struct arbelprm_ud_send_wqe *prev_wqe; + struct arbelprm_ud_send_wqe *wqe; + struct arbelprm_qp_db_record *qp_db_rec; + union arbelprm_doorbell_register db_reg; + const struct ib_gid *gid; + unsigned int wqe_idx_mask; + size_t nds; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p send queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + prev_wqe = &arbel_send_wq->wqe[(wq->next_idx - 1) & wqe_idx_mask].ud; + wqe = &arbel_send_wq->wqe[wq->next_idx & wqe_idx_mask].ud; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); + memset ( &wqe->ctrl, 0, sizeof ( wqe->ctrl ) ); + MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); + memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); + MLX_FILL_2 ( &wqe->ud, 0, + ud_address_vector.pd, ARBEL_GLOBAL_PD, + ud_address_vector.port_number, PXE_IB_PORT ); + MLX_FILL_2 ( &wqe->ud, 1, + ud_address_vector.rlid, av->dlid, + ud_address_vector.g, av->gid_present ); + MLX_FILL_2 ( &wqe->ud, 2, + ud_address_vector.max_stat_rate, + ( ( av->rate >= 3 ) ? 0 : 1 ), + ud_address_vector.msg, 3 ); + MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); + gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); + memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); + MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); + MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update previous work queue entry's "next" field */ + nds = ( ( offsetof ( typeof ( *wqe ), data ) + + sizeof ( wqe->data[0] ) ) >> 4 ); + MLX_SET ( &prev_wqe->next, nopcode, ARBEL_OPCODE_SEND ); + MLX_FILL_3 ( &prev_wqe->next, 1, + nds, nds, + f, 1, + always1, 1 ); + + /* Update doorbell record */ + barrier(); + qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; + MLX_FILL_1 ( qp_db_rec, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Ring doorbell register */ + MLX_FILL_4 ( &db_reg.send, 0, + nopcode, ARBEL_OPCODE_SEND, + f, 1, + wqe_counter, ( wq->next_idx & 0xffff ), + wqe_cnt, 1 ); + MLX_FILL_2 ( &db_reg.send, 1, + nds, nds, + qpn, qp->qpn ); + arbel_ring_doorbell ( arbel, &db_reg, ARBEL_DB_POST_SND_OFFSET ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + +/** + * Post receive work queue entry + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v iobuf I/O buffer + * @ret rc Return status code + */ +static int arbel_post_recv ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct io_buffer *iobuf ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_queue_pair *arbel_qp = qp->dev_priv; + struct ib_work_queue *wq = &qp->recv; + struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; + struct arbelprm_recv_wqe *wqe; + union arbelprm_doorbell_record *db_rec; + unsigned int wqe_idx_mask; + + /* Allocate work queue entry */ + wqe_idx_mask = ( wq->num_wqes - 1 ); + if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { + DBGC ( arbel, "Arbel %p receive queue full", arbel ); + return -ENOBUFS; + } + wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; + wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; + + /* Construct work queue entry */ + MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); + MLX_FILL_1 ( &wqe->data[0], 3, + local_address_l, virt_to_bus ( iobuf->data ) ); + + /* Update doorbell record */ + barrier(); + db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; + MLX_FILL_1 ( &db_rec->qp, 0, + counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); + + /* Update work queue's index */ + wq->next_idx++; + + return 0; +} + +/** + * Handle completion + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v cqe Hardware completion queue entry + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + * @ret rc Return status code + */ +static int arbel_complete ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + union arbelprm_completion_entry *cqe, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct ib_completion completion; + struct ib_work_queue *wq; + struct ib_queue_pair *qp; + struct arbel_queue_pair *arbel_qp; + struct arbel_send_work_queue *arbel_send_wq; + struct arbel_recv_work_queue *arbel_recv_wq; + struct arbelprm_recv_wqe *recv_wqe; + struct io_buffer *iobuf; + ib_completer_t complete; + unsigned int opcode; + unsigned long qpn; + int is_send; + unsigned long wqe_adr; + unsigned int wqe_idx; + int rc = 0; + + /* Parse completion */ + memset ( &completion, 0, sizeof ( completion ) ); + qpn = MLX_GET ( &cqe->normal, my_qpn ); + is_send = MLX_GET ( &cqe->normal, s ); + wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); + opcode = MLX_GET ( &cqe->normal, opcode ); + if ( opcode >= ARBEL_OPCODE_RECV_ERROR ) { + /* "s" field is not valid for error opcodes */ + is_send = ( opcode == ARBEL_OPCODE_SEND_ERROR ); + completion.syndrome = MLX_GET ( &cqe->error, syndrome ); + DBGC ( arbel, "Arbel %p CPN %lx syndrome %x vendor %lx\n", + arbel, cq->cqn, completion.syndrome, + MLX_GET ( &cqe->error, vendor_code ) ); + rc = -EIO; + /* Don't return immediately; propagate error to completer */ + } + + /* Identify work queue */ + wq = ib_find_wq ( cq, qpn, is_send ); + if ( ! wq ) { + DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", + arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); + return -EIO; + } + qp = wq->qp; + arbel_qp = qp->dev_priv; + arbel_send_wq = &arbel_qp->send; + arbel_recv_wq = &arbel_qp->recv; + + /* Identify work queue entry index */ + if ( is_send ) { + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / + sizeof ( arbel_send_wq->wqe[0] ) ); + assert ( wqe_idx < qp->send.num_wqes ); + } else { + wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / + sizeof ( arbel_recv_wq->wqe[0] ) ); + assert ( wqe_idx < qp->recv.num_wqes ); + } + + /* Identify I/O buffer */ + iobuf = wq->iobufs[wqe_idx]; + if ( ! iobuf ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx empty WQE %x\n", + arbel, cq->cqn, qpn, wqe_idx ); + return -EIO; + } + wq->iobufs[wqe_idx] = NULL; + + /* Fill in length for received packets */ + if ( ! is_send ) { + completion.len = MLX_GET ( &cqe->normal, byte_cnt ); + recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv; + assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) == + virt_to_bus ( iobuf->data ) ); + assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == + iob_tailroom ( iobuf ) ); + MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); + MLX_FILL_1 ( &recv_wqe->data[0], 1, + l_key, ARBEL_INVALID_LKEY ); + if ( completion.len > iob_tailroom ( iobuf ) ) { + DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x " + "overlength received packet length %zd\n", + arbel, cq->cqn, qpn, wqe_idx, completion.len ); + return -EIO; + } + } + + /* Pass off to caller's completion handler */ + complete = ( is_send ? complete_send : complete_recv ); + complete ( ibdev, qp, &completion, iobuf ); + + return rc; +} + +/** + * Poll completion queue + * + * @v ibdev Infiniband device + * @v cq Completion queue + * @v complete_send Send completion handler + * @v complete_recv Receive completion handler + */ +static void arbel_poll_cq ( struct ib_device *ibdev, + struct ib_completion_queue *cq, + ib_completer_t complete_send, + ib_completer_t complete_recv ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbel_completion_queue *arbel_cq = cq->dev_priv; + struct arbelprm_cq_ci_db_record *ci_db_rec; + union arbelprm_completion_entry *cqe; + unsigned int cqe_idx_mask; + int rc; + + while ( 1 ) { + /* Look for completion entry */ + cqe_idx_mask = ( cq->num_cqes - 1 ); + cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; + if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { + /* Entry still owned by hardware; end of poll */ + break; + } + + /* Handle completion */ + if ( ( rc = arbel_complete ( ibdev, cq, cqe, complete_send, + complete_recv ) ) != 0 ) { + DBGC ( arbel, "Arbel %p failed to complete: %s\n", + arbel, strerror ( rc ) ); + DBGC_HD ( arbel, cqe, sizeof ( *cqe ) ); + } + + /* Return ownership to hardware */ + MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); + barrier(); + /* Update completion queue's index */ + cq->next_idx++; + /* Update doorbell record */ + ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; + MLX_FILL_1 ( ci_db_rec, 0, + counter, ( cq->next_idx & 0xffffffffUL ) ); + } +} + +/*************************************************************************** + * + * Multicast group operations + * + *************************************************************************** + */ + +/** + * Attach to multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + * @ret rc Return status code + */ +static int arbel_mcast_attach ( struct ib_device *ibdev, + struct ib_queue_pair *qp, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + index = MLX_GET ( &hash, hash ); + + /* Check for existing hash table entry */ + if ( ( rc = arbel_cmd_read_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not read MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + if ( MLX_GET ( &mgm, mgmqp_0.qi ) != 0 ) { + /* FIXME: this implementation allows only a single QP + * per multicast group, and doesn't handle hash + * collisions. Sufficient for IPoIB but may need to + * be extended in future. + */ + DBGC ( arbel, "Arbel %p MGID index %#x already in use\n", + arbel, index ); + return -EBUSY; + } + + /* Update hash table entry */ + MLX_FILL_2 ( &mgm, 8, + mgmqp_0.qpn_i, qp->qpn, + mgmqp_0.qi, 1 ); + memcpy ( &mgm.u.dwords[4], gid, sizeof ( *gid ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Detach from multicast group + * + * @v ibdev Infiniband device + * @v qp Queue pair + * @v gid Multicast GID + */ +static void arbel_mcast_detach ( struct ib_device *ibdev, + struct ib_queue_pair *qp __unused, + struct ib_gid *gid ) { + struct arbel *arbel = ibdev->dev_priv; + struct arbelprm_mgm_hash hash; + struct arbelprm_mgm_entry mgm; + unsigned int index; + int rc; + + /* Generate hash table index */ + if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not hash GID: %s\n", + arbel, strerror ( rc ) ); + return; + } + index = MLX_GET ( &hash, hash ); + + /* Clear hash table entry */ + memset ( &mgm, 0, sizeof ( mgm ) ); + if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", + arbel, index, strerror ( rc ) ); + return; + } +} + +/** Arbel Infiniband operations */ +static struct ib_device_operations arbel_ib_operations = { + .create_cq = arbel_create_cq, + .destroy_cq = arbel_destroy_cq, + .create_qp = arbel_create_qp, + .destroy_qp = arbel_destroy_qp, + .post_send = arbel_post_send, + .post_recv = arbel_post_recv, + .poll_cq = arbel_poll_cq, + .mcast_attach = arbel_mcast_attach, + .mcast_detach = arbel_mcast_detach, +}; + +/*************************************************************************** + * + * MAD IFC operations + * + *************************************************************************** + */ + +static int arbel_mad_ifc ( struct arbel *arbel, + union arbelprm_mad *mad ) { + struct ib_mad_hdr *hdr = &mad->mad.mad_hdr; + int rc; + + hdr->base_version = IB_MGMT_BASE_VERSION; + if ( ( rc = arbel_cmd_mad_ifc ( arbel, mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not issue MAD IFC: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + if ( hdr->status != 0 ) { + DBGC ( arbel, "Arbel %p MAD IFC status %04x\n", + arbel, ntohs ( hdr->status ) ); + return -EIO; + } + return 0; +} + +static int arbel_get_port_info ( struct arbel *arbel, + struct ib_mad_port_info *port_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); + hdr->attr_mod = htonl ( PXE_IB_PORT ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get port info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( port_info, &mad.mad.port_info, sizeof ( *port_info ) ); + return 0; +} + +static int arbel_get_guid_info ( struct arbel *arbel, + struct ib_mad_guid_info *guid_info ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get GUID info: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( guid_info, &mad.mad.guid_info, sizeof ( *guid_info ) ); + return 0; +} + +static int arbel_get_pkey_table ( struct arbel *arbel, + struct ib_mad_pkey_table *pkey_table ) { + union arbelprm_mad mad; + struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; + int rc; + + memset ( &mad, 0, sizeof ( mad ) ); + hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; + hdr->class_version = 1; + hdr->method = IB_MGMT_METHOD_GET; + hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); + if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get pkey table: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + memcpy ( pkey_table, &mad.mad.pkey_table, sizeof ( *pkey_table ) ); + return 0; +} + +static int arbel_get_port_gid ( struct arbel *arbel, + struct ib_gid *port_gid ) { + union { + /* This union exists just to save stack space */ + struct ib_mad_port_info port_info; + struct ib_mad_guid_info guid_info; + } u; + int rc; + + /* Port info gives us the first half of the port GID */ + if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->u.bytes[0], u.port_info.gid_prefix, 8 ); + + /* GUID info gives us the second half of the port GID */ + if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) + return rc; + memcpy ( &port_gid->u.bytes[8], u.guid_info.gid_local, 8 ); + + return 0; +} + +static int arbel_get_sm_lid ( struct arbel *arbel, + unsigned long *sm_lid ) { + struct ib_mad_port_info port_info; + int rc; + + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + return rc; + *sm_lid = ntohs ( port_info.mastersm_lid ); + return 0; +} + +static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { + struct ib_mad_pkey_table pkey_table; + int rc; + + if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) + return rc; + *pkey = ntohs ( pkey_table.pkey[0][0] ); + return 0; +} + +/** + * Wait for link up + * + * @v arbel Arbel device + * @ret rc Return status code + * + * This function shouldn't really exist. Unfortunately, IB links take + * a long time to come up, and we can't get various key parameters + * e.g. our own IPoIB MAC address without information from the subnet + * manager). We should eventually make link-up an asynchronous event. + */ +static int arbel_wait_for_link ( struct arbel *arbel ) { + struct ib_mad_port_info port_info; + unsigned int retries; + int rc; + + printf ( "Waiting for Infiniband link-up..." ); + for ( retries = 20 ; retries ; retries-- ) { + if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) + continue; + if ( ( ( port_info.port_state__link_speed_supported ) & 0xf ) + == 4 ) { + printf ( "ok\n" ); + return 0; + } + printf ( "." ); + sleep ( 1 ); + } + printf ( "failed\n" ); + return -ENODEV; +}; + +/** + * Get MAD parameters + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_mad_params ( struct ib_device *ibdev ) { + struct arbel *arbel = ibdev->dev_priv; + int rc; + + /* Get subnet manager LID */ + if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine subnet manager " + "LID: %s\n", arbel, strerror ( rc ) ); + return rc; + } + + /* Get port GID */ + if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + /* Get partition key */ + if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not determine partition key: " + "%s\n", arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/*************************************************************************** + * + * Firmware control + * + *************************************************************************** + */ + +/** + * Start firmware running + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_start_firmware ( struct arbel *arbel ) { + struct arbelprm_query_fw fw; + struct arbelprm_access_lam lam; + struct arbelprm_virtual_physical_mapping map_fa; + unsigned int fw_pages; + unsigned int log2_fw_pages; + size_t fw_size; + physaddr_t fw_base; + int rc; + + /* Get firmware parameters */ + if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not query firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_query_fw; + } + DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, + MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), + MLX_GET ( &fw, fw_rev_subminor ) ); + fw_pages = MLX_GET ( &fw, fw_pages ); + log2_fw_pages = fls ( fw_pages - 1 ); + fw_pages = ( 1 << log2_fw_pages ); + DBGC ( arbel, "Arbel %p requires %d kB for firmware\n", + arbel, ( fw_pages * 4 ) ); + + /* Enable locally-attached memory. Ignore failure; there may + * be no attached memory. + */ + arbel_cmd_enable_lam ( arbel, &lam ); + + /* Allocate firmware pages and map firmware area */ + fw_size = ( fw_pages * 4096 ); + arbel->firmware_area = umalloc ( fw_size ); + if ( ! arbel->firmware_area ) { + rc = -ENOMEM; + goto err_alloc_fa; + } + fw_base = ( user_to_phys ( arbel->firmware_area, fw_size ) & + ~( fw_size - 1 ) ); + DBGC ( arbel, "Arbel %p firmware area at physical [%lx,%lx)\n", + arbel, fw_base, ( fw_base + fw_size ) ); + memset ( &map_fa, 0, sizeof ( map_fa ) ); + MLX_FILL_2 ( &map_fa, 3, + log2size, log2_fw_pages, + pa_l, ( fw_base >> 12 ) ); + if ( ( rc = arbel_cmd_map_fa ( arbel, &map_fa ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_map_fa; + } + + /* Start firmware */ + if ( ( rc = arbel_cmd_run_fw ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not run firmware: %s\n", + arbel, strerror ( rc ) ); + goto err_run_fw; + } + + DBGC ( arbel, "Arbel %p firmware started\n", arbel ); + return 0; + + err_run_fw: + arbel_cmd_unmap_fa ( arbel ); + err_map_fa: + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; + err_alloc_fa: + err_query_fw: + return rc; +} + +/** + * Stop firmware running + * + * @v arbel Arbel device + */ +static void arbel_stop_firmware ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_unmap_fa ( arbel ) ) != 0 ) { + DBGC ( arbel, "Arbel %p FATAL could not stop firmware: %s\n", + arbel, strerror ( rc ) ); + /* Leak memory and return; at least we avoid corruption */ + return; + } + ufree ( arbel->firmware_area ); + arbel->firmware_area = UNULL; +} + +/*************************************************************************** + * + * Infinihost Context Memory management + * + *************************************************************************** + */ + +/** + * Get device limits + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_get_limits ( struct arbel *arbel ) { + struct arbelprm_query_dev_lim dev_lim; + int rc; + + if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not get device limits: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + arbel->limits.reserved_qps = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); + arbel->limits.qpc_entry_size = MLX_GET ( &dev_lim, qpc_entry_sz ); + arbel->limits.eqpc_entry_size = MLX_GET ( &dev_lim, eqpc_entry_sz ); + arbel->limits.reserved_srqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_srqs ) ); + arbel->limits.srqc_entry_size = MLX_GET ( &dev_lim, srq_entry_sz ); + arbel->limits.reserved_ees = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); + arbel->limits.eec_entry_size = MLX_GET ( &dev_lim, eec_entry_sz ); + arbel->limits.eeec_entry_size = MLX_GET ( &dev_lim, eeec_entry_sz ); + arbel->limits.reserved_cqs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); + arbel->limits.cqc_entry_size = MLX_GET ( &dev_lim, cqc_entry_sz ); + arbel->limits.reserved_mtts = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); + arbel->limits.mtt_entry_size = MLX_GET ( &dev_lim, mtt_entry_sz ); + arbel->limits.reserved_mrws = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mrws ) ); + arbel->limits.mpt_entry_size = MLX_GET ( &dev_lim, mpt_entry_sz ); + arbel->limits.reserved_rdbs = + ( 1 << MLX_GET ( &dev_lim, log2_rsvd_rdbs ) ); + arbel->limits.eqc_entry_size = MLX_GET ( &dev_lim, eqc_entry_sz ); + arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + + return 0; +} + +/** + * Get ICM usage + * + * @v log_num_entries Log2 of the number of entries + * @v entry_size Entry size + * @ret usage Usage size in ICM + */ +static size_t icm_usage ( unsigned int log_num_entries, size_t entry_size ) { + size_t usage; + + usage = ( ( 1 << log_num_entries ) * entry_size ); + usage = ( ( usage + 4095 ) & ~4095 ); + return usage; +} + +/** + * Allocate ICM + * + * @v arbel Arbel device + * @v init_hca INIT_HCA structure to fill in + * @ret rc Return status code + */ +static int arbel_alloc_icm ( struct arbel *arbel, + struct arbelprm_init_hca *init_hca ) { + struct arbelprm_scalar_parameter icm_size; + struct arbelprm_scalar_parameter icm_aux_size; + struct arbelprm_virtual_physical_mapping map_icm_aux; + struct arbelprm_virtual_physical_mapping map_icm; + size_t icm_offset = 0; + unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; + unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; + int rc; + + icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 ); + + /* Queue pair contexts */ + log_num_qps = fls ( arbel->limits.reserved_qps + ARBEL_MAX_QPS - 1 ); + MLX_FILL_2 ( init_hca, 13, + qpc_eec_cqc_eqc_rdb_parameters.qpc_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, + log_num_qps ); + DBGC ( arbel, "Arbel %p ICM QPC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); + + /* Extended queue pair contexts */ + MLX_FILL_1 ( init_hca, 25, + qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM EQPC base = %zx\n", arbel, icm_offset ); + // icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); + icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); + + /* Shared receive queue contexts */ + log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); + MLX_FILL_2 ( init_hca, 19, + qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l, + ( icm_offset >> 5 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, + log_num_srqs ); + DBGC ( arbel, "Arbel %p ICM SRQC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_srqs, arbel->limits.srqc_entry_size ); + + /* End-to-end contexts */ + log_num_ees = fls ( arbel->limits.reserved_ees - 1 ); + MLX_FILL_2 ( init_hca, 17, + qpc_eec_cqc_eqc_rdb_parameters.eec_base_addr_l, + ( icm_offset >> 7 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, + log_num_ees ); + DBGC ( arbel, "Arbel %p ICM EEC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); + + /* Extended end-to-end contexts */ + MLX_FILL_1 ( init_hca, 29, + qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM EEEC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); + + /* Completion queue contexts */ + log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 ); + MLX_FILL_2 ( init_hca, 21, + qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, + log_num_cqs ); + DBGC ( arbel, "Arbel %p ICM CQC base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_cqs, arbel->limits.cqc_entry_size ); + + /* Memory translation table */ + log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); + MLX_FILL_1 ( init_hca, 65, + tpt_parameters.mtt_base_addr_l, icm_offset ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); + + /* Memory protection table */ + log_num_mpts = fls ( arbel->limits.reserved_mrws + 1 - 1 ); + MLX_FILL_1 ( init_hca, 61, + tpt_parameters.mpt_base_adr_l, icm_offset ); + MLX_FILL_1 ( init_hca, 62, + tpt_parameters.log_mpt_sz, log_num_mpts ); + DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); + + /* RDMA something or other */ + log_num_rdbs = fls ( arbel->limits.reserved_rdbs - 1 ); + MLX_FILL_1 ( init_hca, 37, + qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, + icm_offset ); + DBGC ( arbel, "Arbel %p ICM RDB base = %zx\n", arbel, icm_offset ); + icm_offset += icm_usage ( log_num_rdbs, 32 ); + + /* Event queue contexts */ + log_num_eqs = 6; + MLX_FILL_2 ( init_hca, 33, + qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l, + ( icm_offset >> 6 ), + qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, + log_num_eqs ); + DBGC ( arbel, "Arbel %p ICM EQ base = %zx\n", arbel, icm_offset ); + icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); + + /* Multicast table */ + MLX_FILL_1 ( init_hca, 49, + multicast_parameters.mc_base_addr_l, icm_offset ); + MLX_FILL_1 ( init_hca, 52, + multicast_parameters.log_mc_table_entry_sz, + fls ( sizeof ( struct arbelprm_mgm_entry ) - 1 ) ); + MLX_FILL_1 ( init_hca, 53, + multicast_parameters.mc_table_hash_sz, 8 ); + MLX_FILL_1 ( init_hca, 54, + multicast_parameters.log_mc_table_sz, 3 ); + DBGC ( arbel, "Arbel %p ICM MC base = %zx\n", arbel, icm_offset ); + icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); + + arbel->icm_len = icm_offset; + arbel->icm_len = ( ( arbel->icm_len + 4095 ) & ~4095 ); + + /* Get ICM auxiliary area size */ + memset ( &icm_size, 0, sizeof ( icm_size ) ); + MLX_FILL_1 ( &icm_size, 1, value, arbel->icm_len ); + if ( ( rc = arbel_cmd_set_icm_size ( arbel, &icm_size, + &icm_aux_size ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set ICM size: %s\n", + arbel, strerror ( rc ) ); + goto err_set_icm_size; + } + arbel->icm_aux_len = ( MLX_GET ( &icm_aux_size, value ) * 4096 ); + + /* Allocate ICM data and auxiliary area */ + DBGC ( arbel, "Arbel %p requires %zd kB ICM and %zd kB AUX ICM\n", + arbel, ( arbel->icm_len / 1024 ), + ( arbel->icm_aux_len / 1024 ) ); + arbel->icm = umalloc ( arbel->icm_len + arbel->icm_aux_len ); + if ( ! arbel->icm ) { + rc = -ENOMEM; + goto err_alloc; + } + + /* Map ICM auxiliary area */ + memset ( &map_icm_aux, 0, sizeof ( map_icm_aux ) ); + MLX_FILL_2 ( &map_icm_aux, 3, + log2size, fls ( ( arbel->icm_aux_len / 4096 ) - 1 ), + pa_l, + ( user_to_phys ( arbel->icm, arbel->icm_len ) >> 12 ) ); + if ( ( rc = arbel_cmd_map_icm_aux ( arbel, &map_icm_aux ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map AUX ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm_aux; + } + + /* MAP ICM area */ + memset ( &map_icm, 0, sizeof ( map_icm ) ); + MLX_FILL_2 ( &map_icm, 3, + log2size, fls ( ( arbel->icm_len / 4096 ) - 1 ), + pa_l, ( user_to_phys ( arbel->icm, 0 ) >> 12 ) ); + if ( ( rc = arbel_cmd_map_icm ( arbel, &map_icm ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not map ICM: %s\n", + arbel, strerror ( rc ) ); + goto err_map_icm; + } + + return 0; + + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + err_map_icm: + arbel_cmd_unmap_icm_aux ( arbel ); + err_map_icm_aux: + ufree ( arbel->icm ); + arbel->icm = UNULL; + err_alloc: + err_set_icm_size: + return rc; +} + +/** + * Free ICM + * + * @v arbel Arbel device + */ +static void arbel_free_icm ( struct arbel *arbel ) { + arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); + arbel_cmd_unmap_icm_aux ( arbel ); + ufree ( arbel->icm ); + arbel->icm = UNULL; +} + +/*************************************************************************** + * + * Infiniband link-layer operations + * + *************************************************************************** + */ + +/** + * Initialise Infiniband link + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_init_ib ( struct arbel *arbel ) { + struct arbelprm_init_ib init_ib; + int rc; + + memset ( &init_ib, 0, sizeof ( init_ib ) ); + MLX_FILL_3 ( &init_ib, 0, + mtu_cap, ARBEL_MTU_2048, + port_width_cap, 3, + vl_cap, 1 ); + MLX_FILL_1 ( &init_ib, 1, max_gid, 1 ); + MLX_FILL_1 ( &init_ib, 2, max_pkey, 64 ); + if ( ( rc = arbel_cmd_init_ib ( arbel, PXE_IB_PORT, + &init_ib ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not intialise IB: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Close Infiniband link + * + * @v arbel Arbel device + */ +static void arbel_close_ib ( struct arbel *arbel ) { + int rc; + + if ( ( rc = arbel_cmd_close_ib ( arbel, PXE_IB_PORT ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not close IB: %s\n", + arbel, strerror ( rc ) ); + /* Nothing we can do about this */ + } +} + +/*************************************************************************** + * + * PCI interface + * + *************************************************************************** + */ + +/** + * Set up memory protection table + * + * @v arbel Arbel device + * @ret rc Return status code + */ +static int arbel_setup_mpt ( struct arbel *arbel ) { + struct arbelprm_mpt mpt; + uint32_t key; + int rc; + + /* Derive key */ + key = ( arbel->limits.reserved_mrws | ARBEL_MKEY_PREFIX ); + arbel->reserved_lkey = ( ( key << 8 ) | ( key >> 24 ) ); + + /* Initialise memory protection table */ + memset ( &mpt, 0, sizeof ( mpt ) ); + MLX_FILL_4 ( &mpt, 0, + r_w, 1, + pa, 1, + lr, 1, + lw, 1 ); + MLX_FILL_1 ( &mpt, 2, mem_key, key ); + MLX_FILL_1 ( &mpt, 3, pd, ARBEL_GLOBAL_PD ); + MLX_FILL_1 ( &mpt, 6, reg_wnd_len_h, 0xffffffffUL ); + MLX_FILL_1 ( &mpt, 7, reg_wnd_len_l, 0xffffffffUL ); + if ( ( rc = arbel_cmd_sw2hw_mpt ( arbel, arbel->limits.reserved_mrws, + &mpt ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not set up MPT: %s\n", + arbel, strerror ( rc ) ); + return rc; + } + + return 0; +} + +/** + * Probe PCI device + * + * @v pci PCI device + * @v id PCI ID + * @ret rc Return status code + */ +static int arbel_probe ( struct pci_device *pci, + const struct pci_device_id *id __unused ) { + struct ib_device *ibdev; + struct arbel *arbel; + struct arbelprm_init_hca init_hca; + int rc; + + /* Allocate Infiniband device */ + ibdev = alloc_ibdev ( sizeof ( *arbel ) ); + if ( ! ibdev ) { + rc = -ENOMEM; + goto err_ibdev; + } + ibdev->op = &arbel_ib_operations; + pci_set_drvdata ( pci, ibdev ); + ibdev->dev = &pci->dev; + arbel = ibdev->dev_priv; + memset ( arbel, 0, sizeof ( *arbel ) ); + + /* Fix up PCI device */ + adjust_pci_device ( pci ); + + /* Get PCI BARs */ + arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), + ARBEL_PCI_CONFIG_BAR_SIZE ); + arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + + ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), + ARBEL_PCI_UAR_SIZE ); + + /* Allocate space for mailboxes */ + arbel->mailbox_in = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_in ) { + rc = -ENOMEM; + goto err_mailbox_in; + } + arbel->mailbox_out = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); + if ( ! arbel->mailbox_out ) { + rc = -ENOMEM; + goto err_mailbox_out; + } + + /* Start firmware */ + if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) + goto err_start_firmware; + + /* Get device limits */ + if ( ( rc = arbel_get_limits ( arbel ) ) != 0 ) + goto err_get_limits; + + /* Allocate ICM */ + memset ( &init_hca, 0, sizeof ( init_hca ) ); + if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) + goto err_alloc_icm; + + + unsigned long uar_offset = ( arbel->limits.reserved_uars * 4096 ); + arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, + uar_offset ) ); + memset ( arbel->db_rec, 0, 4096 ); + union arbelprm_doorbell_record *db_rec; + db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; + MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); + + + /* Initialise HCA */ + MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); + if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n", + arbel, strerror ( rc ) ); + goto err_init_hca; + } + + /* Set up memory protection */ + if ( ( rc = arbel_setup_mpt ( arbel ) ) != 0 ) + goto err_setup_mpt; + + /* Bring up IB layer */ + if ( ( rc = arbel_init_ib ( arbel ) ) != 0 ) + goto err_init_ib; + + /* Wait for link */ + if ( ( rc = arbel_wait_for_link ( arbel ) ) != 0 ) + goto err_wait_for_link; + + /* Get MAD parameters */ + if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) + goto err_get_mad_params; + + DBGC ( arbel, "Arbel %p port GID is %08lx:%08lx:%08lx:%08lx\n", arbel, + htonl ( ibdev->port_gid.u.dwords[0] ), + htonl ( ibdev->port_gid.u.dwords[1] ), + htonl ( ibdev->port_gid.u.dwords[2] ), + htonl ( ibdev->port_gid.u.dwords[3] ) ); + + /* Add IPoIB device */ + if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { + DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", + arbel, strerror ( rc ) ); + goto err_ipoib_probe; + } + + return 0; + + err_ipoib_probe: + err_get_mad_params: + err_wait_for_link: + arbel_close_ib ( arbel ); + err_init_ib: + err_setup_mpt: + arbel_cmd_close_hca ( arbel ); + err_init_hca: + arbel_free_icm ( arbel ); + err_alloc_icm: + err_get_limits: + arbel_stop_firmware ( arbel ); + err_start_firmware: + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + err_mailbox_out: + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + err_mailbox_in: + free_ibdev ( ibdev ); + err_ibdev: + return rc; +} + +/** + * Remove PCI device + * + * @v pci PCI device + */ +static void arbel_remove ( struct pci_device *pci ) { + struct ib_device *ibdev = pci_get_drvdata ( pci ); + struct arbel *arbel = ibdev->dev_priv; + + ipoib_remove ( ibdev ); + arbel_close_ib ( arbel ); + arbel_cmd_close_hca ( arbel ); + arbel_free_icm ( arbel ); + arbel_stop_firmware ( arbel ); + arbel_stop_firmware ( arbel ); + free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); + free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); + free_ibdev ( ibdev ); +} + +static struct pci_device_id arbel_nics[] = { + PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), + PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), +}; + +struct pci_driver arbel_driver __pci_driver = { + .ids = arbel_nics, + .id_count = ( sizeof ( arbel_nics ) / sizeof ( arbel_nics[0] ) ), + .probe = arbel_probe, + .remove = arbel_remove, +}; diff --git a/src/drivers/infiniband/arbel.h b/src/drivers/infiniband/arbel.h new file mode 100644 index 00000000..4d2c740f --- /dev/null +++ b/src/drivers/infiniband/arbel.h @@ -0,0 +1,459 @@ +#ifndef _ARBEL_H +#define _ARBEL_H + +/** @file + * + * Mellanox Arbel Infiniband HCA driver + * + */ + +#include +#include +#include "mlx_bitops.h" +#include "MT25218_PRM.h" + +/* + * Hardware constants + * + */ + +/* PCI BARs */ +#define ARBEL_PCI_CONFIG_BAR PCI_BASE_ADDRESS_0 +#define ARBEL_PCI_CONFIG_BAR_SIZE 0x100000 +#define ARBEL_PCI_UAR_BAR PCI_BASE_ADDRESS_2 +#define ARBEL_PCI_UAR_IDX 1 +#define ARBEL_PCI_UAR_SIZE 0x1000 + +/* UAR context table (UCE) resource types */ +#define ARBEL_UAR_RES_NONE 0x00 +#define ARBEL_UAR_RES_CQ_CI 0x01 +#define ARBEL_UAR_RES_CQ_ARM 0x02 +#define ARBEL_UAR_RES_SQ 0x03 +#define ARBEL_UAR_RES_RQ 0x04 +#define ARBEL_UAR_RES_GROUP_SEP 0x07 + +/* Work queue entry and completion queue entry opcodes */ +#define ARBEL_OPCODE_SEND 0x0a +#define ARBEL_OPCODE_RECV_ERROR 0xfe +#define ARBEL_OPCODE_SEND_ERROR 0xff + +/* HCA command register opcodes */ +#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 +#define ARBEL_HCR_QUERY_FW 0x0004 +#define ARBEL_HCR_INIT_HCA 0x0007 +#define ARBEL_HCR_CLOSE_HCA 0x0008 +#define ARBEL_HCR_INIT_IB 0x0009 +#define ARBEL_HCR_CLOSE_IB 0x000a +#define ARBEL_HCR_SW2HW_MPT 0x000d +#define ARBEL_HCR_MAP_EQ 0x0012 +#define ARBEL_HCR_SW2HW_EQ 0x0013 +#define ARBEL_HCR_HW2SW_EQ 0x0014 +#define ARBEL_HCR_SW2HW_CQ 0x0016 +#define ARBEL_HCR_HW2SW_CQ 0x0017 +#define ARBEL_HCR_RST2INIT_QPEE 0x0019 +#define ARBEL_HCR_INIT2RTR_QPEE 0x001a +#define ARBEL_HCR_RTR2RTS_QPEE 0x001b +#define ARBEL_HCR_2RST_QPEE 0x0021 +#define ARBEL_HCR_MAD_IFC 0x0024 +#define ARBEL_HCR_READ_MGM 0x0025 +#define ARBEL_HCR_WRITE_MGM 0x0026 +#define ARBEL_HCR_MGID_HASH 0x0027 +#define ARBEL_HCR_RUN_FW 0x0ff6 +#define ARBEL_HCR_DISABLE_LAM 0x0ff7 +#define ARBEL_HCR_ENABLE_LAM 0x0ff8 +#define ARBEL_HCR_UNMAP_ICM 0x0ff9 +#define ARBEL_HCR_MAP_ICM 0x0ffa +#define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb +#define ARBEL_HCR_MAP_ICM_AUX 0x0ffc +#define ARBEL_HCR_SET_ICM_SIZE 0x0ffd +#define ARBEL_HCR_UNMAP_FA 0x0ffe +#define ARBEL_HCR_MAP_FA 0x0fff + +/* Service types */ +#define ARBEL_ST_UD 0x03 + +/* MTUs */ +#define ARBEL_MTU_2048 0x04 + +#define ARBEL_NO_EQ 64 + +#define ARBEL_INVALID_LKEY 0x00000100UL + +#define ARBEL_DB_POST_SND_OFFSET 0x10 + +/* + * Datatypes that seem to be missing from the autogenerated documentation + * + */ +struct arbelprm_mgm_hash_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t hash[0x00010]; + pseudo_bit_t reserved1[0x00010]; +} __attribute__ (( packed )); + +struct arbelprm_scalar_parameter_st { + pseudo_bit_t reserved0[0x00020]; +/* -------------- */ + pseudo_bit_t value[0x00020]; +} __attribute__ (( packed )); + +/* + * Wrapper structures for hardware datatypes + * + */ + +struct MLX_DECLARE_STRUCT ( arbelprm_access_lam ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_eqc ); +struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); +struct MLX_DECLARE_STRUCT ( arbelprm_init_ib ); +struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); +struct MLX_DECLARE_STRUCT ( arbelprm_mpt ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); +struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); +struct MLX_DECLARE_STRUCT ( arbelprm_query_fw ); +struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); +struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_scalar_parameter ); +struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); +struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); +struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); +struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); + +/* + * Composite hardware datatypes + * + */ + +#define ARBEL_MAX_GATHER 1 + +struct arbelprm_ud_send_wqe { + struct arbelprm_wqe_segment_next next; + struct arbelprm_wqe_segment_ctrl_send ctrl; + struct arbelprm_wqe_segment_ud ud; + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; +} __attribute__ (( packed )); + +#define ARBEL_MAX_SCATTER 1 + +struct arbelprm_recv_wqe { + /* The autogenerated header is inconsistent between send and + * receive WQEs. The "ctrl" structure for receive WQEs is + * defined to include the "next" structure. Since the "ctrl" + * part of the "ctrl" structure contains only "reserved, must + * be zero" bits, we ignore its definition and provide + * something more usable. + */ + struct arbelprm_recv_wqe_segment_next next; + uint32_t ctrl[2]; /* All "reserved, must be zero" */ + struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; +} __attribute__ (( packed )); + +union arbelprm_completion_entry { + struct arbelprm_completion_queue_entry normal; + struct arbelprm_completion_with_error error; +} __attribute__ (( packed )); + +union arbelprm_doorbell_record { + struct arbelprm_cq_arm_db_record cq_arm; + struct arbelprm_cq_ci_db_record cq_ci; + struct arbelprm_qp_db_record qp; +} __attribute__ (( packed )); + +union arbelprm_doorbell_register { + struct arbelprm_send_doorbell send; + uint32_t dword[2]; +} __attribute__ (( packed )); + +union arbelprm_mad { + struct arbelprm_mad_ifc ifc; + union ib_mad mad; +} __attribute__ (( packed )); + +/* + * gPXE-specific definitions + * + */ + +/** Arbel device limits */ +struct arbel_dev_limits { + /** Number of reserved QPs */ + unsigned int reserved_qps; + /** QP context entry size */ + size_t qpc_entry_size; + /** Extended QP context entry size */ + size_t eqpc_entry_size; + /** Number of reserved SRQs */ + unsigned int reserved_srqs; + /** SRQ context entry size */ + size_t srqc_entry_size; + /** Number of reserved EEs */ + unsigned int reserved_ees; + /** EE context entry size */ + size_t eec_entry_size; + /** Extended EE context entry size */ + size_t eeec_entry_size; + /** Number of reserved CQs */ + unsigned int reserved_cqs; + /** CQ context entry size */ + size_t cqc_entry_size; + /** Number of reserved MTTs */ + unsigned int reserved_mtts; + /** MTT entry size */ + size_t mtt_entry_size; + /** Number of reserved MRWs */ + unsigned int reserved_mrws; + /** MPT entry size */ + size_t mpt_entry_size; + /** Number of reserved RDBs */ + unsigned int reserved_rdbs; + /** EQ context entry size */ + size_t eqc_entry_size; + /** Number of reserved UARs */ + unsigned int reserved_uars; +}; + +/** Alignment of Arbel send work queue entries */ +#define ARBEL_SEND_WQE_ALIGN 128 + +/** An Arbel send work queue entry */ +union arbel_send_wqe { + struct arbelprm_ud_send_wqe ud; + uint8_t force_align[ARBEL_SEND_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel send work queue */ +struct arbel_send_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_send_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; +}; + +/** Alignment of Arbel receive work queue entries */ +#define ARBEL_RECV_WQE_ALIGN 64 + +/** An Arbel receive work queue entry */ +union arbel_recv_wqe { + struct arbelprm_recv_wqe recv; + uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; +} __attribute__ (( packed )); + +/** An Arbel receive work queue */ +struct arbel_recv_work_queue { + /** Doorbell record number */ + unsigned int doorbell_idx; + /** Work queue entries */ + union arbel_recv_wqe *wqe; + /** Size of work queue */ + size_t wqe_size; +}; + +/** Maximum number of allocatable queue pairs + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_QPS 8 + +/** Base queue pair number */ +#define ARBEL_QPN_BASE 0x550000 + +/** An Arbel queue pair */ +struct arbel_queue_pair { + /** Send work queue */ + struct arbel_send_work_queue send; + /** Receive work queue */ + struct arbel_recv_work_queue recv; +}; + +/** Maximum number of allocatable completion queues + * + * This is a policy decision, not a device limit. + */ +#define ARBEL_MAX_CQS 8 + +/** An Arbel completion queue */ +struct arbel_completion_queue { + /** Consumer counter doorbell record number */ + unsigned int ci_doorbell_idx; + /** Arm queue doorbell record number */ + unsigned int arm_doorbell_idx; + /** Completion queue entries */ + union arbelprm_completion_entry *cqe; + /** Size of completion queue */ + size_t cqe_size; +}; + +/** An Arbel resource bitmask */ +typedef uint32_t arbel_bitmask_t; + +/** Size of an Arbel resource bitmask */ +#define ARBEL_BITMASK_SIZE(max_entries) \ + ( ( (max_entries) + ( 8 * sizeof ( arbel_bitmask_t ) ) - 1 ) / \ + ( 8 * sizeof ( arbel_bitmask_t ) ) ) + +/** An Arbel device */ +struct arbel { + /** PCI configuration registers */ + void *config; + /** PCI user Access Region */ + void *uar; + + /** Command input mailbox */ + void *mailbox_in; + /** Command output mailbox */ + void *mailbox_out; + + /** Firmware area in external memory */ + userptr_t firmware_area; + /** ICM size */ + size_t icm_len; + /** ICM AUX size */ + size_t icm_aux_len; + /** ICM area */ + userptr_t icm; + + /** Doorbell records */ + union arbelprm_doorbell_record *db_rec; + /** Reserved LKey + * + * Used to get unrestricted memory access. + */ + unsigned long reserved_lkey; + + /** Completion queue in-use bitmask */ + arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; + /** Queue pair in-use bitmask */ + arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ]; + + /** Device limits */ + struct arbel_dev_limits limits; +}; + +/** Global protection domain */ +#define ARBEL_GLOBAL_PD 0x123456 + +/** Memory key prefix */ +#define ARBEL_MKEY_PREFIX 0x77000000UL + +/* + * HCA commands + * + */ + +#define ARBEL_HCR_BASE 0x80680 +#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) +#define ARBEL_HCR_MAX_WAIT_MS 2000 +#define ARBEL_MBOX_ALIGN 4096 +#define ARBEL_MBOX_SIZE 512 + +/* HCA command is split into + * + * bits 11:0 Opcode + * bit 12 Input uses mailbox + * bit 13 Output uses mailbox + * bits 22:14 Input parameter length (in dwords) + * bits 31:23 Output parameter length (in dwords) + * + * Encoding the information in this way allows us to cut out several + * parameters to the arbel_command() call. + */ +#define ARBEL_HCR_IN_MBOX 0x00001000UL +#define ARBEL_HCR_OUT_MBOX 0x00002000UL +#define ARBEL_HCR_OPCODE( _command ) ( (_command) & 0xfff ) +#define ARBEL_HCR_IN_LEN( _command ) ( ( (_command) >> 12 ) & 0x7fc ) +#define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) + +/** Build HCR command from component parts */ +#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \ + _out_mbox, _out_len ) \ + ( (_opcode) | \ + ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ + ( ( (_in_len) / 4 ) << 14 ) | \ + ( (_out_mbox) ? ARBEL_HCR_OUT_MBOX : 0 ) | \ + ( ( (_out_len) / 4 ) << 23 ) ) + +#define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) + +#define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) + +#define ARBEL_HCR_VOID_CMD( _opcode ) \ + ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 ) + +/* + * Doorbell record allocation + * + * The doorbell record map looks like: + * + * ARBEL_MAX_CQS * Arm completion queue doorbell + * ARBEL_MAX_QPS * Send work request doorbell + * Group separator + * ...(empty space)... + * ARBEL_MAX_QPS * Receive work request doorbell + * ARBEL_MAX_CQS * Completion queue consumer counter update doorbell + */ + +#define ARBEL_MAX_DOORBELL_RECORDS 512 +#define ARBEL_GROUP_SEPARATOR_DOORBELL ( ARBEL_MAX_CQS + ARBEL_MAX_QPS ) + +/** + * Get arm completion queue doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_cq_arm_doorbell_idx ( unsigned int cqn_offset ) { + return cqn_offset; +} + +/** + * Get send work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_send_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_CQS + qpn_offset ); +} + +/** + * Get receive work request doorbell index + * + * @v qpn_offset Queue pair number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - ARBEL_MAX_CQS - qpn_offset - 1 ); +} + +/** + * Get completion queue consumer counter doorbell index + * + * @v cqn_offset Completion queue number offset + * @ret doorbell_idx Doorbell index + */ +static inline unsigned int +arbel_cq_ci_doorbell_idx ( unsigned int cqn_offset ) { + return ( ARBEL_MAX_DOORBELL_RECORDS - cqn_offset - 1 ); +} + +#endif /* _ARBEL_H */ diff --git a/src/drivers/infiniband/mlx_bitops.h b/src/drivers/infiniband/mlx_bitops.h new file mode 100644 index 00000000..ec57d7b0 --- /dev/null +++ b/src/drivers/infiniband/mlx_bitops.h @@ -0,0 +1,209 @@ +#ifndef _MLX_BITOPS_H +#define _MLX_BITOPS_H + +/* + * Copyright (C) 2007 Michael Brown . + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/** + * @file + * + * Mellanox bit operations + * + */ + +/* Datatype used to represent a bit in the Mellanox autogenerated headers */ +typedef unsigned char pseudo_bit_t; + +/** + * Wrapper structure for pseudo_bit_t structures + * + * This structure provides a wrapper around the autogenerated + * pseudo_bit_t structures. It has the correct size, and also + * encapsulates type information about the underlying pseudo_bit_t + * structure, which allows the MLX_FILL etc. macros to work without + * requiring explicit type information. + */ +#define MLX_DECLARE_STRUCT( _structure ) \ + _structure { \ + union { \ + uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \ + uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \ + struct _structure ## _st *dummy[0]; \ + } u; \ + } + +/** Get pseudo_bit_t structure type from wrapper structure pointer */ +#define MLX_PSEUDO_STRUCT( _ptr ) \ + typeof ( *((_ptr)->u.dummy[0]) ) + +/** Bit offset of a field within a pseudo_bit_t structure */ +#define MLX_BIT_OFFSET( _structure_st, _field ) \ + offsetof ( _structure_st, _field ) + +/** Dword offset of a field within a pseudo_bit_t structure */ +#define MLX_DWORD_OFFSET( _structure_st, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 ) + +/** Dword bit offset of a field within a pseudo_bit_t structure + * + * Yes, using mod-32 would work, but would lose the check for the + * error of specifying a mismatched field name and dword index. + */ +#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \ + ( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) ) + +/** Bit width of a field within a pseudo_bit_t structure */ +#define MLX_BIT_WIDTH( _structure_st, _field ) \ + sizeof ( ( ( _structure_st * ) NULL )->_field ) + +/** Bit mask for a field within a pseudo_bit_t structure */ +#define MLX_BIT_MASK( _structure_st, _field ) \ + ( ( ~( ( uint32_t ) 0 ) ) >> \ + ( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) ) + +/* + * Assemble native-endian dword from named fields and values + * + */ + +#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \ + ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) + +#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \ + ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ + MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) ) + +/* + * Build native-endian (positive) dword bitmasks from named fields + * + */ + +#define MLX_MASK_1( _structure_st, _index, _field ) \ + ( MLX_BIT_MASK ( _structure_st, _field ) << \ + MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) + +#define MLX_MASK_2( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_3( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_4( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_5( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) ) + +#define MLX_MASK_6( _structure_st, _index, _field, ... ) \ + ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ + MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) ) + +/* + * Populate big-endian dwords from named fields and values + * + */ + +#define MLX_FILL( _ptr, _index, _assembled ) \ + do { \ + uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ + uint32_t __assembled = (_assembled); \ + *__ptr = cpu_to_be32 ( __assembled ); \ + } while ( 0 ) + +#define MLX_FILL_1( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_2( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_3( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_4( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_5( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +#define MLX_FILL_6( _ptr, _index, ... ) \ + MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\ + _index, __VA_ARGS__ ) ) + +/* + * Modify big-endian dword using named field and value + * + */ + +#define MLX_SET( _ptr, _field, _value ) \ + do { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ) ); \ + __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field, _value ); \ + *__ptr = cpu_to_be32 ( __value ); \ + } while ( 0 ) + +/* + * Extract value of named field + * + */ + +#define MLX_GET( _ptr, _field ) \ + ( { \ + unsigned int __index = \ + MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ + uint32_t __value = be32_to_cpu ( *__ptr ); \ + __value >>= \ + MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \ + __index, _field ); \ + __value &= \ + MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ + __value; \ + } ) + +#endif /* _MLX_BITOPS_H */ From 95498fa038e6a3bfc88a1efeec2eb64fe0de6332 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 06:26:31 +0100 Subject: [PATCH 77/84] Remove the last remaining visible hack. --- src/drivers/infiniband/arbel.c | 19 +++++++++---------- src/drivers/infiniband/arbel.h | 2 ++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/drivers/infiniband/arbel.c b/src/drivers/infiniband/arbel.c index 51246d7e..9ba81ca7 100644 --- a/src/drivers/infiniband/arbel.c +++ b/src/drivers/infiniband/arbel.c @@ -1692,6 +1692,7 @@ static int arbel_alloc_icm ( struct arbel *arbel, struct arbelprm_scalar_parameter icm_aux_size; struct arbelprm_virtual_physical_mapping map_icm_aux; struct arbelprm_virtual_physical_mapping map_icm; + union arbelprm_doorbell_record *db_rec; size_t icm_offset = 0; unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; @@ -1848,6 +1849,14 @@ static int arbel_alloc_icm ( struct arbel *arbel, goto err_map_icm; } + /* Initialise UAR context */ + arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, 0 ) + + ( arbel->limits.reserved_uars * + ARBEL_PAGE_SIZE ) ); + memset ( arbel->db_rec, 0, ARBEL_PAGE_SIZE ); + db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; + MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); + return 0; arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); @@ -2026,16 +2035,6 @@ static int arbel_probe ( struct pci_device *pci, if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) goto err_alloc_icm; - - unsigned long uar_offset = ( arbel->limits.reserved_uars * 4096 ); - arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, - uar_offset ) ); - memset ( arbel->db_rec, 0, 4096 ); - union arbelprm_doorbell_record *db_rec; - db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; - MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); - - /* Initialise HCA */ MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { diff --git a/src/drivers/infiniband/arbel.h b/src/drivers/infiniband/arbel.h index 4d2c740f..4d7e4fc6 100644 --- a/src/drivers/infiniband/arbel.h +++ b/src/drivers/infiniband/arbel.h @@ -79,6 +79,8 @@ #define ARBEL_INVALID_LKEY 0x00000100UL +#define ARBEL_PAGE_SIZE 4096 + #define ARBEL_DB_POST_SND_OFFSET 0x10 /* From 2f2388185641596ddedb608c5463dae997723db1 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 06:37:22 +0100 Subject: [PATCH 78/84] Added arbel.c --- src/include/gpxe/errfile.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/gpxe/errfile.h b/src/include/gpxe/errfile.h index 325d2387..1b0ed33f 100644 --- a/src/include/gpxe/errfile.h +++ b/src/include/gpxe/errfile.h @@ -102,9 +102,9 @@ #define ERRFILE_via_velocity ( ERRFILE_DRIVER | 0x00450000 ) #define ERRFILE_w89c840 ( ERRFILE_DRIVER | 0x00460000 ) #define ERRFILE_ipoib ( ERRFILE_DRIVER | 0x00470000 ) -#define ERRFILE_mt25218 ( ERRFILE_DRIVER | 0x00480000 ) #define ERRFILE_scsi ( ERRFILE_DRIVER | 0x00700000 ) +#define ERRFILE_arbel ( ERRFILE_DRIVER | 0x00710000 ) #define ERRFILE_aoe ( ERRFILE_NET | 0x00000000 ) #define ERRFILE_arp ( ERRFILE_NET | 0x00010000 ) From 30717896f5b0c8d42860c928440ccb3eb0a559cc Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 07:02:41 +0100 Subject: [PATCH 79/84] IB can't afford many RX buffers, because the MTU is so large. Reduce the TCP window to compensate. --- src/include/gpxe/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/include/gpxe/tcp.h b/src/include/gpxe/tcp.h index d967791f..e2753120 100644 --- a/src/include/gpxe/tcp.h +++ b/src/include/gpxe/tcp.h @@ -275,7 +275,8 @@ struct tcp_options { * actually use 65536, we use a window size of (65536-4) to ensure * that payloads remain dword-aligned. */ -#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +//#define TCP_MAX_WINDOW_SIZE ( 65536 - 4 ) +#define TCP_MAX_WINDOW_SIZE 4096 /** * Path MTU From 0f60150c44f3e6d70baa88050f2779ae25e403c5 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Tue, 18 Sep 2007 07:07:04 +0100 Subject: [PATCH 80/84] Reverted driver/net/mlx_ipoib to clean master state --- src/drivers/net/mlx_ipoib/arbel.h | 455 ---- src/drivers/net/mlx_ipoib/bit_ops.h | 191 -- src/drivers/net/mlx_ipoib/cmdif_comm.c | 30 +- src/drivers/net/mlx_ipoib/cmdif_mt25218.c | 51 +- .../net/mlx_ipoib/doc/README.boot_over_ib | 20 +- src/drivers/net/mlx_ipoib/ib_driver.c | 32 +- src/drivers/net/mlx_ipoib/ib_driver.h | 12 +- src/drivers/net/mlx_ipoib/ib_mad.c | 7 +- src/drivers/net/mlx_ipoib/ib_mad.h | 2 +- src/drivers/net/mlx_ipoib/ib_mt23108.c | 64 +- src/drivers/net/mlx_ipoib/ib_mt25218.c | 125 +- src/drivers/net/mlx_ipoib/ipoib.c | 68 +- src/drivers/net/mlx_ipoib/mt23108.c | 37 +- src/drivers/net/mlx_ipoib/mt23108_imp.c | 6 +- src/drivers/net/mlx_ipoib/mt25218.c | 2313 ++--------------- src/drivers/net/mlx_ipoib/mt25218.h | 50 +- src/drivers/net/mlx_ipoib/mt25218_imp.c | 8 +- src/drivers/net/mlx_ipoib/mt_version.c | 2 +- src/drivers/net/mlx_ipoib/patches/dhcpd.patch | 18 +- 19 files changed, 384 insertions(+), 3107 deletions(-) delete mode 100644 src/drivers/net/mlx_ipoib/arbel.h diff --git a/src/drivers/net/mlx_ipoib/arbel.h b/src/drivers/net/mlx_ipoib/arbel.h deleted file mode 100644 index dcca5107..00000000 --- a/src/drivers/net/mlx_ipoib/arbel.h +++ /dev/null @@ -1,455 +0,0 @@ -#ifndef _ARBEL_H -#define _ARBEL_H - -/** @file - * - * Mellanox Arbel Infiniband HCA driver - * - */ - -#include -#include - -/* - * Hardware constants - * - */ - -/* PCI BARs */ -#define ARBEL_PCI_CONFIG_BAR PCI_BASE_ADDRESS_0 -#define ARBEL_PCI_CONFIG_BAR_SIZE 0x100000 -#define ARBEL_PCI_UAR_BAR PCI_BASE_ADDRESS_2 -#define ARBEL_PCI_UAR_IDX 1 -#define ARBEL_PCI_UAR_SIZE 0x1000 - -/* UAR context table (UCE) resource types */ -#define ARBEL_UAR_RES_NONE 0x00 -#define ARBEL_UAR_RES_CQ_CI 0x01 -#define ARBEL_UAR_RES_CQ_ARM 0x02 -#define ARBEL_UAR_RES_SQ 0x03 -#define ARBEL_UAR_RES_RQ 0x04 -#define ARBEL_UAR_RES_GROUP_SEP 0x07 - -/* Work queue entry and completion queue entry opcodes */ -#define ARBEL_OPCODE_SEND 0x0a -#define ARBEL_OPCODE_RECV_ERROR 0xfe -#define ARBEL_OPCODE_SEND_ERROR 0xff - -/* HCA command register opcodes */ -#define ARBEL_HCR_QUERY_DEV_LIM 0x0003 -#define ARBEL_HCR_QUERY_FW 0x0004 -#define ARBEL_HCR_INIT_HCA 0x0007 -#define ARBEL_HCR_CLOSE_HCA 0x0008 -#define ARBEL_HCR_INIT_IB 0x0009 -#define ARBEL_HCR_CLOSE_IB 0x000a -#define ARBEL_HCR_SW2HW_MPT 0x000d -#define ARBEL_HCR_MAP_EQ 0x0012 -#define ARBEL_HCR_SW2HW_EQ 0x0013 -#define ARBEL_HCR_HW2SW_EQ 0x0014 -#define ARBEL_HCR_SW2HW_CQ 0x0016 -#define ARBEL_HCR_HW2SW_CQ 0x0017 -#define ARBEL_HCR_RST2INIT_QPEE 0x0019 -#define ARBEL_HCR_INIT2RTR_QPEE 0x001a -#define ARBEL_HCR_RTR2RTS_QPEE 0x001b -#define ARBEL_HCR_2RST_QPEE 0x0021 -#define ARBEL_HCR_MAD_IFC 0x0024 -#define ARBEL_HCR_READ_MGM 0x0025 -#define ARBEL_HCR_WRITE_MGM 0x0026 -#define ARBEL_HCR_MGID_HASH 0x0027 -#define ARBEL_HCR_RUN_FW 0x0ff6 -#define ARBEL_HCR_DISABLE_LAM 0x0ff7 -#define ARBEL_HCR_ENABLE_LAM 0x0ff8 -#define ARBEL_HCR_UNMAP_ICM 0x0ff9 -#define ARBEL_HCR_MAP_ICM 0x0ffa -#define ARBEL_HCR_UNMAP_ICM_AUX 0x0ffb -#define ARBEL_HCR_MAP_ICM_AUX 0x0ffc -#define ARBEL_HCR_SET_ICM_SIZE 0x0ffd -#define ARBEL_HCR_UNMAP_FA 0x0ffe -#define ARBEL_HCR_MAP_FA 0x0fff - -/* Service types */ -#define ARBEL_ST_UD 0x03 - -/* MTUs */ -#define ARBEL_MTU_2048 0x04 - -#define ARBEL_NO_EQ 64 - -#define ARBEL_INVALID_LKEY 0x00000100UL - -/* - * Datatypes that seem to be missing from the autogenerated documentation - * - */ -struct arbelprm_mgm_hash_st { - pseudo_bit_t reserved0[0x00020]; -/* -------------- */ - pseudo_bit_t hash[0x00010]; - pseudo_bit_t reserved1[0x00010]; -} __attribute__ (( packed )); - -struct arbelprm_scalar_parameter_st { - pseudo_bit_t reserved0[0x00020]; -/* -------------- */ - pseudo_bit_t value[0x00020]; -} __attribute__ (( packed )); - -/* - * Wrapper structures for hardware datatypes - * - */ - -struct MLX_DECLARE_STRUCT ( arbelprm_access_lam ); -struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_context ); -struct MLX_DECLARE_STRUCT ( arbelprm_completion_queue_entry ); -struct MLX_DECLARE_STRUCT ( arbelprm_completion_with_error ); -struct MLX_DECLARE_STRUCT ( arbelprm_cq_arm_db_record ); -struct MLX_DECLARE_STRUCT ( arbelprm_cq_ci_db_record ); -struct MLX_DECLARE_STRUCT ( arbelprm_eqc ); -struct MLX_DECLARE_STRUCT ( arbelprm_hca_command_register ); -struct MLX_DECLARE_STRUCT ( arbelprm_init_hca ); -struct MLX_DECLARE_STRUCT ( arbelprm_init_ib ); -struct MLX_DECLARE_STRUCT ( arbelprm_mad_ifc ); -struct MLX_DECLARE_STRUCT ( arbelprm_mgm_entry ); -struct MLX_DECLARE_STRUCT ( arbelprm_mgm_hash ); -struct MLX_DECLARE_STRUCT ( arbelprm_mpt ); -struct MLX_DECLARE_STRUCT ( arbelprm_qp_db_record ); -struct MLX_DECLARE_STRUCT ( arbelprm_qp_ee_state_transitions ); -struct MLX_DECLARE_STRUCT ( arbelprm_query_dev_lim ); -struct MLX_DECLARE_STRUCT ( arbelprm_query_fw ); -struct MLX_DECLARE_STRUCT ( arbelprm_queue_pair_ee_context_entry ); -struct MLX_DECLARE_STRUCT ( arbelprm_recv_wqe_segment_next ); -struct MLX_DECLARE_STRUCT ( arbelprm_scalar_parameter ); -struct MLX_DECLARE_STRUCT ( arbelprm_send_doorbell ); -struct MLX_DECLARE_STRUCT ( arbelprm_ud_address_vector ); -struct MLX_DECLARE_STRUCT ( arbelprm_virtual_physical_mapping ); -struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ctrl_send ); -struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_data_ptr ); -struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_next ); -struct MLX_DECLARE_STRUCT ( arbelprm_wqe_segment_ud ); - -/* - * Composite hardware datatypes - * - */ - -#define ARBEL_MAX_GATHER 1 - -struct arbelprm_ud_send_wqe { - struct arbelprm_wqe_segment_next next; - struct arbelprm_wqe_segment_ctrl_send ctrl; - struct arbelprm_wqe_segment_ud ud; - struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_GATHER]; -} __attribute__ (( packed )); - -#define ARBEL_MAX_SCATTER 1 - -struct arbelprm_recv_wqe { - /* The autogenerated header is inconsistent between send and - * receive WQEs. The "ctrl" structure for receive WQEs is - * defined to include the "next" structure. Since the "ctrl" - * part of the "ctrl" structure contains only "reserved, must - * be zero" bits, we ignore its definition and provide - * something more usable. - */ - struct arbelprm_recv_wqe_segment_next next; - uint32_t ctrl[2]; /* All "reserved, must be zero" */ - struct arbelprm_wqe_segment_data_ptr data[ARBEL_MAX_SCATTER]; -} __attribute__ (( packed )); - -union arbelprm_completion_entry { - struct arbelprm_completion_queue_entry normal; - struct arbelprm_completion_with_error error; -} __attribute__ (( packed )); - -union arbelprm_doorbell_record { - struct arbelprm_cq_arm_db_record cq_arm; - struct arbelprm_cq_ci_db_record cq_ci; - struct arbelprm_qp_db_record qp; -} __attribute__ (( packed )); - -union arbelprm_doorbell_register { - struct arbelprm_send_doorbell send; - uint32_t dword[2]; -} __attribute__ (( packed )); - -union arbelprm_mad { - struct arbelprm_mad_ifc ifc; - union ib_mad mad; -} __attribute__ (( packed )); - -/* - * gPXE-specific definitions - * - */ - -/** Arbel device limits */ -struct arbel_dev_limits { - /** Number of reserved QPs */ - unsigned int reserved_qps; - /** QP context entry size */ - size_t qpc_entry_size; - /** Extended QP context entry size */ - size_t eqpc_entry_size; - /** Number of reserved SRQs */ - unsigned int reserved_srqs; - /** SRQ context entry size */ - size_t srqc_entry_size; - /** Number of reserved EEs */ - unsigned int reserved_ees; - /** EE context entry size */ - size_t eec_entry_size; - /** Extended EE context entry size */ - size_t eeec_entry_size; - /** Number of reserved CQs */ - unsigned int reserved_cqs; - /** CQ context entry size */ - size_t cqc_entry_size; - /** Number of reserved MTTs */ - unsigned int reserved_mtts; - /** MTT entry size */ - size_t mtt_entry_size; - /** Number of reserved MRWs */ - unsigned int reserved_mrws; - /** MPT entry size */ - size_t mpt_entry_size; - /** Number of reserved RDBs */ - unsigned int reserved_rdbs; - /** EQ context entry size */ - size_t eqc_entry_size; - /** Number of reserved UARs */ - unsigned int reserved_uars; -}; - -/** Alignment of Arbel send work queue entries */ -#define ARBEL_SEND_WQE_ALIGN 128 - -/** An Arbel send work queue entry */ -union arbel_send_wqe { - struct arbelprm_ud_send_wqe ud; - uint8_t force_align[ARBEL_SEND_WQE_ALIGN]; -} __attribute__ (( packed )); - -/** An Arbel send work queue */ -struct arbel_send_work_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Work queue entries */ - union arbel_send_wqe *wqe; - /** Size of work queue */ - size_t wqe_size; -}; - -/** Alignment of Arbel receive work queue entries */ -#define ARBEL_RECV_WQE_ALIGN 64 - -/** An Arbel receive work queue entry */ -union arbel_recv_wqe { - struct arbelprm_recv_wqe recv; - uint8_t force_align[ARBEL_RECV_WQE_ALIGN]; -} __attribute__ (( packed )); - -/** An Arbel receive work queue */ -struct arbel_recv_work_queue { - /** Doorbell record number */ - unsigned int doorbell_idx; - /** Work queue entries */ - union arbel_recv_wqe *wqe; - /** Size of work queue */ - size_t wqe_size; -}; - -/** Maximum number of allocatable queue pairs - * - * This is a policy decision, not a device limit. - */ -#define ARBEL_MAX_QPS 8 - -/** Base queue pair number */ -#define ARBEL_QPN_BASE 0x550000 - -/** An Arbel queue pair */ -struct arbel_queue_pair { - /** Send work queue */ - struct arbel_send_work_queue send; - /** Receive work queue */ - struct arbel_recv_work_queue recv; -}; - -/** Maximum number of allocatable completion queues - * - * This is a policy decision, not a device limit. - */ -#define ARBEL_MAX_CQS 8 - -/** An Arbel completion queue */ -struct arbel_completion_queue { - /** Consumer counter doorbell record number */ - unsigned int ci_doorbell_idx; - /** Arm queue doorbell record number */ - unsigned int arm_doorbell_idx; - /** Completion queue entries */ - union arbelprm_completion_entry *cqe; - /** Size of completion queue */ - size_t cqe_size; -}; - -/** An Arbel resource bitmask */ -typedef uint32_t arbel_bitmask_t; - -/** Size of an Arbel resource bitmask */ -#define ARBEL_BITMASK_SIZE(max_entries) \ - ( ( (max_entries) + ( 8 * sizeof ( arbel_bitmask_t ) ) - 1 ) / \ - ( 8 * sizeof ( arbel_bitmask_t ) ) ) - -/** An Arbel device */ -struct arbel { - /** PCI configuration registers */ - void *config; - /** PCI user Access Region */ - void *uar; - - /** Command input mailbox */ - void *mailbox_in; - /** Command output mailbox */ - void *mailbox_out; - - /** Firmware area in external memory */ - userptr_t firmware_area; - /** ICM size */ - size_t icm_len; - /** ICM AUX size */ - size_t icm_aux_len; - /** ICM area */ - userptr_t icm; - - /** Doorbell records */ - union arbelprm_doorbell_record *db_rec; - /** Reserved LKey - * - * Used to get unrestricted memory access. - */ - unsigned long reserved_lkey; - - /** Completion queue in-use bitmask */ - arbel_bitmask_t cq_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_CQS ) ]; - /** Queue pair in-use bitmask */ - arbel_bitmask_t qp_inuse[ ARBEL_BITMASK_SIZE ( ARBEL_MAX_QPS ) ]; - - /** Device limits */ - struct arbel_dev_limits limits; -}; - -/** Global protection domain */ -#define ARBEL_GLOBAL_PD 0x123456 - -/** Memory key prefix */ -#define ARBEL_MKEY_PREFIX 0x77000000UL - -/* - * HCA commands - * - */ - -#define ARBEL_HCR_BASE 0x80680 -#define ARBEL_HCR_REG(x) ( ARBEL_HCR_BASE + 4 * (x) ) -#define ARBEL_HCR_MAX_WAIT_MS 2000 -#define ARBEL_MBOX_ALIGN 4096 -#define ARBEL_MBOX_SIZE 512 - -/* HCA command is split into - * - * bits 11:0 Opcode - * bit 12 Input uses mailbox - * bit 13 Output uses mailbox - * bits 22:14 Input parameter length (in dwords) - * bits 31:23 Output parameter length (in dwords) - * - * Encoding the information in this way allows us to cut out several - * parameters to the arbel_command() call. - */ -#define ARBEL_HCR_IN_MBOX 0x00001000UL -#define ARBEL_HCR_OUT_MBOX 0x00002000UL -#define ARBEL_HCR_OPCODE( _command ) ( (_command) & 0xfff ) -#define ARBEL_HCR_IN_LEN( _command ) ( ( (_command) >> 12 ) & 0x7fc ) -#define ARBEL_HCR_OUT_LEN( _command ) ( ( (_command) >> 21 ) & 0x7fc ) - -/** Build HCR command from component parts */ -#define ARBEL_HCR_INOUT_CMD( _opcode, _in_mbox, _in_len, \ - _out_mbox, _out_len ) \ - ( (_opcode) | \ - ( (_in_mbox) ? ARBEL_HCR_IN_MBOX : 0 ) | \ - ( ( (_in_len) / 4 ) << 14 ) | \ - ( (_out_mbox) ? ARBEL_HCR_OUT_MBOX : 0 ) | \ - ( ( (_out_len) / 4 ) << 23 ) ) - -#define ARBEL_HCR_IN_CMD( _opcode, _in_mbox, _in_len ) \ - ARBEL_HCR_INOUT_CMD ( _opcode, _in_mbox, _in_len, 0, 0 ) - -#define ARBEL_HCR_OUT_CMD( _opcode, _out_mbox, _out_len ) \ - ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, _out_mbox, _out_len ) - -#define ARBEL_HCR_VOID_CMD( _opcode ) \ - ARBEL_HCR_INOUT_CMD ( _opcode, 0, 0, 0, 0 ) - -/* - * Doorbell record allocation - * - * The doorbell record map looks like: - * - * ARBEL_MAX_CQS * Arm completion queue doorbell - * ARBEL_MAX_QPS * Send work request doorbell - * Group separator - * ...(empty space)... - * ARBEL_MAX_QPS * Receive work request doorbell - * ARBEL_MAX_CQS * Completion queue consumer counter update doorbell - */ - -#define ARBEL_MAX_DOORBELL_RECORDS 512 -#define ARBEL_GROUP_SEPARATOR_DOORBELL ( ARBEL_MAX_CQS + ARBEL_MAX_QPS ) - -/** - * Get arm completion queue doorbell index - * - * @v cqn_offset Completion queue number offset - * @ret doorbell_idx Doorbell index - */ -static inline unsigned int -arbel_cq_arm_doorbell_idx ( unsigned int cqn_offset ) { - return cqn_offset; -} - -/** - * Get send work request doorbell index - * - * @v qpn_offset Queue pair number offset - * @ret doorbell_idx Doorbell index - */ -static inline unsigned int -arbel_send_doorbell_idx ( unsigned int qpn_offset ) { - return ( ARBEL_MAX_CQS + qpn_offset ); -} - -/** - * Get receive work request doorbell index - * - * @v qpn_offset Queue pair number offset - * @ret doorbell_idx Doorbell index - */ -static inline unsigned int -arbel_recv_doorbell_idx ( unsigned int qpn_offset ) { - return ( ARBEL_MAX_DOORBELL_RECORDS - ARBEL_MAX_CQS - qpn_offset - 1 ); -} - -/** - * Get completion queue consumer counter doorbell index - * - * @v cqn_offset Completion queue number offset - * @ret doorbell_idx Doorbell index - */ -static inline unsigned int -arbel_cq_ci_doorbell_idx ( unsigned int cqn_offset ) { - return ( ARBEL_MAX_DOORBELL_RECORDS - cqn_offset - 1 ); -} - -#endif /* _ARBEL_H */ diff --git a/src/drivers/net/mlx_ipoib/bit_ops.h b/src/drivers/net/mlx_ipoib/bit_ops.h index 8b81bfcc..e3fb4331 100644 --- a/src/drivers/net/mlx_ipoib/bit_ops.h +++ b/src/drivers/net/mlx_ipoib/bit_ops.h @@ -80,14 +80,6 @@ struct addr_64_st { */ #define MT_EXTRACT_ARRAY32(A,O,S) MT_EXTRACT32(((__u32*)A)[O >> 5],(O & MASK32(5)),S) -/* - * MT_EXTRACT_ARRAY32_BE macro is similar to EXTRACT but works on an array of (__u32), - * thus offset may be larger than 32 (but not size). - * - * (added by mcb30) - */ -#define MT_EXTRACT_ARRAY32_BE(A,O,S) MT_EXTRACT32(be32_to_cpu(((__u32*)A)[O >> 5]),(O & MASK32(5)),S) - /* * MT_INSERT_ARRAY32 macro is similar to INSERT but works on an array of (__u32), * thus offset may be larger than 32 (but not size). @@ -98,8 +90,6 @@ struct addr_64_st { #define EX_FLD(a, st, fld) MT_EXTRACT_ARRAY32(a, MT_BIT_OFFSET(st, fld), MT_BIT_SIZE(st, fld)) -#define EX_FLD_BE(a, st, fld) MT_EXTRACT_ARRAY32_BE(a, MT_BIT_OFFSET(st, fld), MT_BIT_SIZE(st, fld)) - /* return the address of the dword holding the field buf = pointer to buffer where to place the value @@ -133,185 +123,4 @@ struct addr_64_st { field; \ }) - - -/* Remaining code Copyright Fen Systems Ltd. 2007 */ - -/** - * Wrapper structure for pseudo_bit_t structures - * - * This structure provides a wrapper around the autogenerated - * pseudo_bit_t structures. It has the correct size, and also - * encapsulates type information about the underlying pseudo_bit_t - * structure, which allows the MLX_FILL etc. macros to work without - * requiring explicit type information. - */ -#define MLX_DECLARE_STRUCT( _structure ) \ - _structure { \ - union { \ - uint8_t bytes[ sizeof ( struct _structure ## _st ) / 8 ]; \ - uint32_t dwords[ sizeof ( struct _structure ## _st ) / 32 ]; \ - struct _structure ## _st *dummy[0]; \ - } u; \ - } - -/** Get pseudo_bit_t structure type from wrapper structure pointer */ -#define MLX_PSEUDO_STRUCT( _ptr ) \ - typeof ( *((_ptr)->u.dummy[0]) ) - -/** Bit offset of a field within a pseudo_bit_t structure */ -#define MLX_BIT_OFFSET( _structure_st, _field ) \ - offsetof ( _structure_st, _field ) - -/** Dword offset of a field within a pseudo_bit_t structure */ -#define MLX_DWORD_OFFSET( _structure_st, _field ) \ - ( MLX_BIT_OFFSET ( _structure_st, _field ) / 32 ) - -/** Dword bit offset of a field within a pseudo_bit_t structure - * - * Yes, using mod-32 would work, but would lose the check for the - * error of specifying a mismatched field name and dword index. - */ -#define MLX_DWORD_BIT_OFFSET( _structure_st, _index, _field ) \ - ( MLX_BIT_OFFSET ( _structure_st, _field ) - ( 32 * (_index) ) ) - -/** Bit width of a field within a pseudo_bit_t structure */ -#define MLX_BIT_WIDTH( _structure_st, _field ) \ - sizeof ( ( ( _structure_st * ) NULL )->_field ) - -/** Bit mask for a field within a pseudo_bit_t structure */ -#define MLX_BIT_MASK( _structure_st, _field ) \ - ( ( ~( ( uint32_t ) 0 ) ) >> \ - ( 32 - MLX_BIT_WIDTH ( _structure_st, _field ) ) ) - -/* - * Assemble native-endian dword from named fields and values - * - */ - -#define MLX_ASSEMBLE_1( _structure_st, _index, _field, _value ) \ - ( (_value) << MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) - -#define MLX_ASSEMBLE_2( _structure_st, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ - MLX_ASSEMBLE_1 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_ASSEMBLE_3( _structure_st, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ - MLX_ASSEMBLE_2 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_ASSEMBLE_4( _structure_st, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ - MLX_ASSEMBLE_3 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_ASSEMBLE_5( _structure_st, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ - MLX_ASSEMBLE_4 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_ASSEMBLE_6( _structure_st, _index, _field, _value, ... ) \ - ( MLX_ASSEMBLE_1 ( _structure_st, _index, _field, _value ) | \ - MLX_ASSEMBLE_5 ( _structure_st, _index, __VA_ARGS__ ) ) - -/* - * Build native-endian (positive) dword bitmasks from named fields - * - */ - -#define MLX_MASK_1( _structure_st, _index, _field ) \ - ( MLX_BIT_MASK ( _structure_st, _field ) << \ - MLX_DWORD_BIT_OFFSET ( _structure_st, _index, _field ) ) - -#define MLX_MASK_2( _structure_st, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ - MLX_MASK_1 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_MASK_3( _structure_st, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ - MLX_MASK_2 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_MASK_4( _structure_st, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ - MLX_MASK_3 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_MASK_5( _structure_st, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ - MLX_MASK_4 ( _structure_st, _index, __VA_ARGS__ ) ) - -#define MLX_MASK_6( _structure_st, _index, _field, ... ) \ - ( MLX_MASK_1 ( _structure_st, _index, _field ) | \ - MLX_MASK_5 ( _structure_st, _index, __VA_ARGS__ ) ) - -/* - * Populate big-endian dwords from named fields and values - * - */ - -#define MLX_FILL( _ptr, _index, _assembled ) \ - do { \ - uint32_t *__ptr = &(_ptr)->u.dwords[(_index)]; \ - uint32_t __assembled = (_assembled); \ - *__ptr = cpu_to_be32 ( __assembled ); \ - } while ( 0 ) - -#define MLX_FILL_1( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -#define MLX_FILL_2( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_2 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -#define MLX_FILL_3( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_3 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -#define MLX_FILL_4( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_4 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -#define MLX_FILL_5( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_5 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -#define MLX_FILL_6( _ptr, _index, ... ) \ - MLX_FILL ( _ptr, _index, MLX_ASSEMBLE_6 ( MLX_PSEUDO_STRUCT ( _ptr ),\ - _index, __VA_ARGS__ ) ) - -/* - * Modify big-endian dword using named field and value - * - */ - -#define MLX_SET( _ptr, _field, _value ) \ - do { \ - unsigned int __index = \ - MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ - uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value &= ~( MLX_MASK_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - __index, _field ) ); \ - __value |= MLX_ASSEMBLE_1 ( MLX_PSEUDO_STRUCT ( _ptr ), \ - __index, _field, _value ); \ - *__ptr = cpu_to_be32 ( __value ); \ - } while ( 0 ) - -/* - * Extract value of named field - * - */ - -#define MLX_GET( _ptr, _field ) \ - ( { \ - unsigned int __index = \ - MLX_DWORD_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ - uint32_t *__ptr = &(_ptr)->u.dwords[__index]; \ - uint32_t __value = be32_to_cpu ( *__ptr ); \ - __value >>= \ - MLX_DWORD_BIT_OFFSET ( MLX_PSEUDO_STRUCT ( _ptr ), \ - __index, _field ); \ - __value &= \ - MLX_BIT_MASK ( MLX_PSEUDO_STRUCT ( _ptr ), _field ); \ - __value; \ - } ) - #endif /* __bit_ops_h__ */ diff --git a/src/drivers/net/mlx_ipoib/cmdif_comm.c b/src/drivers/net/mlx_ipoib/cmdif_comm.c index 363e4209..d43a1068 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_comm.c +++ b/src/drivers/net/mlx_ipoib/cmdif_comm.c @@ -112,8 +112,6 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) __u32 hcr[7], data; __u8 status; - DBG ( "Executing command:\n" ); - /* check if go bit is free */ ret = cmdif_is_free(&is_free); if (ret) { @@ -131,15 +129,6 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) edit_hcr(cmd_prms, hcr); __asm__ __volatile__("":::"memory"); - DBG_HD ( &hcr[0], sizeof ( hcr ) ); - if ( cmd_prms->in_trans == TRANS_MAILBOX ) { - size_t size = ( 4 * cmd_prms->in_param_size ); - if ( size > 512 ) - size = 512; - DBG2 ( "Input mailbox:\n" ); - DBG2_HD ( &cmd_prms->in_param[0], size ); - } - for (i = 0; i < 7; ++i) { ret = gw_write_cr(HCR_BASE + i * 4, hcr[i]); if (ret) { @@ -170,14 +159,6 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) return status; } - if ( cmd_prms->out_trans == TRANS_MAILBOX ) { - size_t size = ( 4 * cmd_prms->out_param_size ); - if ( size > 512 ) - size = 512; - DBG2 ( "Output mailbox:\n" ); - DBG2_HD ( &cmd_prms->out_param[0], size ); - } - if (cmd_prms->out_trans == TRANS_MAILBOX) be_to_cpu_buf(cmd_prms->out_param, cmd_prms->out_param_size); else if (cmd_prms->out_trans == TRANS_IMMEDIATE) { @@ -187,8 +168,6 @@ static XHH_cmd_status_t cmd_invoke(command_fields_t * cmd_prms) return -1; } - DBG ( "Command executed successfully\n" ); - return 0; } @@ -563,10 +542,7 @@ static int cmd_mgid_hash(__u8 * gid, __u16 * mgid_hash_p) { int rc; command_fields_t cmd_desc; - union { - __u32 u32; - __u16 u16[2]; - } result; + __u16 result[2]; memset(&cmd_desc, 0, sizeof cmd_desc); @@ -578,9 +554,9 @@ static int cmd_mgid_hash(__u8 * gid, __u16 * mgid_hash_p) rc = cmd_invoke(&cmd_desc); if (!rc) { - rc = gw_read_cr(HCR_BASE + 16, &result.u32); + rc = gw_read_cr(HCR_BASE + 16, (__u32 *) result); if (!rc) { - *mgid_hash_p = result.u16[0]; + *mgid_hash_p = result[0]; } } diff --git a/src/drivers/net/mlx_ipoib/cmdif_mt25218.c b/src/drivers/net/mlx_ipoib/cmdif_mt25218.c index 704fb1fc..fb95edbe 100644 --- a/src/drivers/net/mlx_ipoib/cmdif_mt25218.c +++ b/src/drivers/net/mlx_ipoib/cmdif_mt25218.c @@ -22,6 +22,14 @@ #include "cmdif_priv.h" #include "mt25218.h" +/* + * cmd_sys_dis + */ +static int cmd_sys_dis(void) +{ + return 0; +} + /* * cmd_write_mgm */ @@ -317,24 +325,6 @@ static int cmd_map_icm_aux(struct map_icm_st *map_icm_aux_p) return rc; } - -/* - * cmd_unmap_icm_aux - */ -static int cmd_unmap_icm_aux(void) -{ - int rc; - command_fields_t cmd_desc; - - memset(&cmd_desc, 0, sizeof cmd_desc); - - cmd_desc.opcode = MEMFREE_CMD_UNMAP_ICM_AUX; - - rc = cmd_invoke(&cmd_desc); - - return rc; -} - /* * cmd_map_icm */ @@ -381,31 +371,6 @@ static int cmd_map_icm(struct map_icm_st *map_icm_p) return rc; } - - -/* - * cmd_unmap_icm - */ -static int cmd_unmap_icm(struct map_icm_st *map_icm_p) -{ - int rc; - command_fields_t cmd_desc; - __u32 iprm[2]; - - memset(&cmd_desc, 0, sizeof cmd_desc); - - cmd_desc.opcode = MEMFREE_CMD_UNMAP_ICM; - iprm[0] = map_icm_p->vpm_arr[0].va_h; - iprm[1] = map_icm_p->vpm_arr[0].va_l; - cmd_desc.in_param = iprm; - cmd_desc.in_trans = TRANS_IMMEDIATE; - cmd_desc.input_modifier = 1 << map_icm_p->vpm_arr[0].log2_size; - - rc = cmd_invoke(&cmd_desc); - - return rc; -} - /* * cmd_query_dev_lim */ diff --git a/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib b/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib index 062abd3f..07738628 100644 --- a/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib +++ b/src/drivers/net/mlx_ipoib/doc/README.boot_over_ib @@ -92,11 +92,13 @@ files with .mlx extension also available from Mellanox's web site. 6. Preparing the DHCP Server ----------------------------- -The DHCP server may need to be modified in order to work on IPOIB. Some -distributuions alreay support this (Some SUSE distributuions) while others -do not. If the pre-installed server does not support IPOIB, the user can download -the sources from ISC http://www.isc.org/ and apply the appropriate patch in -the patches directory. +DHCP messages over IP Over IB are transmitted as broadcasts. In order to +distinguish between messages belonging to a certain DHCP session, the messages +must carry the client identifier option (see ietf documentation referred to +above). As of November 2005, ISC DHCP servers do not support this feature. +They are expected to support this at the end of 2005. In order to work this +out, the appropriate patch must be applied (see patches directory). It has +been tested on version isc-dhcpd-V3.0.4b2. The DHCP server must run on a machine which supports IP Over IB. The Mellanox IBGD package (gen1 or gen2) can be used to provide this. @@ -169,14 +171,6 @@ PXE_IB_PORT. 14. Installing a package from Mellanox -------------------------------------- -The package comes as a compressed file with extension .bz2 or .gz. Follow -these steps: -1. Create a directory -2. cd to this directory -3. tar jxf for .bz2 files or - tar zxf for .gz files - -The binaries can be found under src/bin When using a package obtained from Mellanox Technologies' web site, the directory src/bin will contain the driver binary files. The files have a .bin extension and are equivalent to the same files with .zrom extension. diff --git a/src/drivers/net/mlx_ipoib/ib_driver.c b/src/drivers/net/mlx_ipoib/ib_driver.c index 34d4cbaa..a46db7fc 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.c +++ b/src/drivers/net/mlx_ipoib/ib_driver.c @@ -62,9 +62,6 @@ static int wait_logic_link_up(__u8 port) return 0; } -unsigned long ipoib_qkey; -unsigned long hack_ipoib_qkey; - static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) { int rc; @@ -150,9 +147,6 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) qkey, mlid); } - hack_ipoib_qkey = ipoib_qkey = qkey; - -#if 0 rc = create_ipoib_qp(&ib_data.ipoib_qp, &ib_data.ipoib_snd_cq, &ib_data.ipoib_rcv_cq, qkey); @@ -172,7 +166,6 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) } else { tprintf("add_qp_to_mcast_group() success"); } -#endif /* create a broadcast group ud AV */ av = alloc_ud_av(); @@ -185,19 +178,6 @@ static int ib_driver_init(struct pci_device *pci, udqp_t * ipoib_qph_p) tprintf("modify_av_params() success"); ib_data.bcast_av = av; -#if ! CREATE_OWN - rc = create_ipoib_qp(&ib_data.ipoib_qp, - &ib_data.ipoib_snd_cq, - &ib_data.ipoib_rcv_cq, qkey); - if (rc) { - eprintf(""); - return rc; - } - - tprintf("create_ipoib_qp() success"); - *ipoib_qph_p = ib_data.ipoib_qp; -#endif - do { rc = poll_eq(&ib_eqe, &num_eqe); if (rc) { @@ -268,10 +248,12 @@ static int ib_driver_close(int fw_fatal) ret = 1; } - rc = unset_hca(); - if (rc) { - eprintf(""); - ret = 1; + if (!fw_fatal) { + rc = cmd_sys_dis(); + if (rc) { + eprintf(""); + ret = 1; + } } return ret; @@ -286,7 +268,7 @@ static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p) end = currticks() + tout; do { - rc = ib_poll_cqx(cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cq(cqh, &ib_cqe, &num_cqes); if (rc) return rc; diff --git a/src/drivers/net/mlx_ipoib/ib_driver.h b/src/drivers/net/mlx_ipoib/ib_driver.h index 7fc57364..305bb5d4 100644 --- a/src/drivers/net/mlx_ipoib/ib_driver.h +++ b/src/drivers/net/mlx_ipoib/ib_driver.h @@ -49,17 +49,17 @@ #define QPN_BASE 0x550000 enum { + MADS_QPN_SN, IPOIB_QPN_SN, - MADS_QPN_SN = 4, - MAX_APP_QPS = 8 + MAX_APP_QPS }; enum { + MADS_SND_CQN_SN, + MADS_RCV_CQN_SN, IPOIB_SND_CQN_SN, IPOIB_RCV_CQN_SN, - MADS_SND_CQN_SN = 4, - MADS_RCV_CQN_SN, - MAX_APP_CQS = 8 + MAX_APP_CQS }; enum { @@ -153,7 +153,7 @@ static int gw_read_cr(__u32 addr, __u32 * result); static int gw_write_cr(__u32 addr, __u32 data); static ud_av_t alloc_ud_av(void); static void free_ud_av(ud_av_t av); -static int ib_poll_cqx(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); +static int ib_poll_cq(cq_t cq, struct ib_cqe_st *ib_cqe_p, __u8 * num_cqes); static int add_qp_to_mcast_group(union ib_gid_u mcast_gid, __u8 add); static int clear_interrupt(void); static int poll_cqe_tout(cq_t cqh, __u16 tout, void **wqe, int *good_p); diff --git a/src/drivers/net/mlx_ipoib/ib_mad.c b/src/drivers/net/mlx_ipoib/ib_mad.c index 4da4677b..3e263a5b 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.c +++ b/src/drivers/net/mlx_ipoib/ib_mad.c @@ -158,7 +158,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) eprintf(""); return -1; } - tprintf("allocated snd_wqe=%p", snd_wqe); + tprintf("allocated snd_wqe=0x%lx", snd_wqe); mad = get_send_wqe_buf(snd_wqe, 0); memset(mad, 0, 256); @@ -264,7 +264,7 @@ static int join_mc_group(__u32 * qkey_p, __u16 * mlid_p, __u8 join) return is_good ? 0 : -1; } -int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, +static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, u8 * rate_p) { struct path_record_mad_st *mad, *rcv_mad; @@ -321,9 +321,6 @@ int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, u8 * sl_p, cpu_to_be_buf(mad, sizeof *mad); memcpy(mad->path_record.sgid.raw, ib_data.port_gid.raw, 16); - DBG ( "data:\n" ); - DBG_HD ( mad, sizeof ( *mad ) ); - rc = post_send_req(qp, snd_wqe, 1); if (rc) { eprintf(""); diff --git a/src/drivers/net/mlx_ipoib/ib_mad.h b/src/drivers/net/mlx_ipoib/ib_mad.h index 51b90d21..5ffb5404 100644 --- a/src/drivers/net/mlx_ipoib/ib_mad.h +++ b/src/drivers/net/mlx_ipoib/ib_mad.h @@ -104,7 +104,7 @@ union mad_u { struct ib_mad_st mad; } __attribute__ ((packed)); -int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, +static int get_path_record(union ib_gid_u *dgid, __u16 * dlid_p, __u8 * sl_p, __u8 * rate_p); #endif /* __ib_mad_h__ */ diff --git a/src/drivers/net/mlx_ipoib/ib_mt23108.c b/src/drivers/net/mlx_ipoib/ib_mt23108.c index d9261b9c..ca3abb10 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt23108.c +++ b/src/drivers/net/mlx_ipoib/ib_mt23108.c @@ -92,12 +92,7 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) for (bus = 0; bus < 256; ++bus) { for (dev = 0; dev < 32; ++dev) { devfn = (dev << 3); - - struct pci_device tmp; - tmp.bus = bus; - tmp.devfn = devfn; - - rc = pcibios_read_config_word(&tmp, PCI_VENDOR_ID, + rc = pcibios_read_config_word(bus, devfn, PCI_VENDOR_ID, &vendor); if (rc) return rc; @@ -105,7 +100,7 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) if (vendor != MELLANOX_VENDOR_ID) continue; - rc = pcibios_read_config_word(&tmp, PCI_DEVICE_ID, + rc = pcibios_read_config_word(bus, devfn, PCI_DEVICE_ID, &dev_id); if (rc) return rc; @@ -113,7 +108,7 @@ static int find_mlx_bridge(__u8 hca_bus, __u8 * br_bus_p, __u8 * br_devfn_p) if (dev_id != TAVOR_BRIDGE_DEVICE_ID) continue; - rc = pcibios_read_config_byte(&tmp, + rc = pcibios_read_config_byte(bus, devfn, PCI_SECONDARY_BUS, &sec_bus); if (rc) @@ -166,7 +161,7 @@ static int ib_device_init(struct pci_device *dev) tavor_pci_dev.dev.dev = dev; tprintf(""); - if (dev->device == TAVOR_DEVICE_ID) { + if (dev->dev_id == TAVOR_DEVICE_ID) { rc = find_mlx_bridge(dev->bus, &br_bus, &br_devfn); if (rc) { @@ -180,12 +175,7 @@ static int ib_device_init(struct pci_device *dev) tprintf("bus=%d devfn=0x%x", br_bus, br_devfn); /* save config space */ for (i = 0; i < 64; ++i) { - - struct pci_device tmp; - tmp.bus = br_bus; - tmp.devfn = br_devfn; - - rc = pcibios_read_config_dword(&tmp, i << 2, + rc = pcibios_read_config_dword(br_bus, br_devfn, i << 2, &tavor_pci_dev.br. dev_config_space[i]); if (rc) { @@ -213,7 +203,7 @@ static int ib_device_init(struct pci_device *dev) eprintf(""); return -1; } - tprintf("uar_base (pa:va) = 0x%lx %p", + tprintf("uar_base (pa:va) = 0x%lx 0x%lx", tavor_pci_dev.dev.bar[2] + UAR_IDX * 0x1000, tavor_pci_dev.uar); tprintf(""); @@ -235,7 +225,7 @@ static int init_dev_data(void) dev_buffers_p = bus_to_virt(tmp); memreg_size = (__u32) (&memreg_size) - (__u32) dev_buffers_p; - tprintf("src_buf=%p, dev_buffers_p=%p, memreg_size=0x%lx", src_buf, + tprintf("src_buf=0x%lx, dev_buffers_p=0x%lx, memreg_size=0x%x", src_buf, dev_buffers_p, memreg_size); return 0; @@ -246,14 +236,10 @@ static int restore_config(void) int i; int rc; - if (tavor_pci_dev.dev.dev->device == TAVOR_DEVICE_ID) { + if (tavor_pci_dev.dev.dev->dev_id == TAVOR_DEVICE_ID) { for (i = 0; i < 64; ++i) { - - struct pci_device tmp; - tmp.bus = tavor_pci_dev.br.bus; - tmp.devfn = tavor_pci_dev.br.devfn; - - rc = pcibios_write_config_dword(&tmp, + rc = pcibios_write_config_dword(tavor_pci_dev.br.bus, + tavor_pci_dev.br.devfn, i << 2, tavor_pci_dev.br. dev_config_space[i]); @@ -565,9 +551,9 @@ static int setup_hca(__u8 port, void **eq_p) tprintf("fw_rev_major=%d", qfw.fw_rev_major); tprintf("fw_rev_minor=%d", qfw.fw_rev_minor); tprintf("fw_rev_subminor=%d", qfw.fw_rev_subminor); - tprintf("error_buf_start_h=0x%lx", qfw.error_buf_start_h); - tprintf("error_buf_start_l=0x%lx", qfw.error_buf_start_l); - tprintf("error_buf_size=%ld", qfw.error_buf_size); + tprintf("error_buf_start_h=0x%x", qfw.error_buf_start_h); + tprintf("error_buf_start_l=0x%x", qfw.error_buf_start_l); + tprintf("error_buf_size=%d", qfw.error_buf_size); } if (qfw.error_buf_start_h) { @@ -813,20 +799,6 @@ static int setup_hca(__u8 port, void **eq_p) return ret; } - -static int unset_hca(void) -{ - int rc = 0; - - if (!fw_fatal) { - rc = cmd_sys_dis(); - if (rc) - eprintf(""); - } - - return rc; -} - static void *get_inprm_buf(void) { return dev_buffers_p->inprm_buf; @@ -958,7 +930,7 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) __u32 *psrc, *pdst; __u32 nds; - tprintf("snd_wqe=%p, virt_to_bus(snd_wqe)=0x%lx", snd_wqe, + tprintf("snd_wqe=0x%lx, virt_to_bus(snd_wqe)=0x%lx", snd_wqe, virt_to_bus(snd_wqe)); memset(&dbell, 0, sizeof dbell); @@ -1082,7 +1054,7 @@ static int create_ipoib_qp(void **qp_pp, /* update data */ qp->rcv_wq[i].wqe_cont.qp = qp; qp->rcv_bufs[i] = ib_buffers.ipoib_rcv_buf[i]; - tprintf("rcv_buf=%p", qp->rcv_bufs[i]); + tprintf("rcv_buf=%lx", qp->rcv_bufs[i]); } /* init send queue WQEs list */ @@ -1415,7 +1387,7 @@ static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) eprintf("syndrome=0x%lx", EX_FLD(cqe.error_cqe, tavorprm_completion_with_error_st, syndrome)); - eprintf("wqe_addr=%p", wqe_p); + eprintf("wqe_addr=0x%lx", wqe_p); eprintf("wqe_size=0x%lx", EX_FLD(cqe.error_cqe, tavorprm_completion_with_error_st, wqe_size)); @@ -1544,7 +1516,7 @@ static struct recv_wqe_st *alloc_rcv_wqe(struct udqp_st *qp) wqe->mpointer[1].lkey = dev_ib_data.mkey; wqe->mpointer[1].byte_count = qp->rcv_buf_sz; - tprintf("rcv_buf=%p\n", qp->rcv_bufs[new_entry]); + tprintf("rcv_buf=%lx\n", qp->rcv_bufs[new_entry]); /* we do it only on the data segment since the control segment is always owned by HW */ @@ -1671,7 +1643,7 @@ static int poll_eq(struct ib_eqe_st *ib_eqe_p, __u8 * num_eqes) struct eq_st *eq = &dev_ib_data.eq; ptr = (__u32 *) (&(eq->eq_buf[eq->cons_idx])); - tprintf("cons)idx=%ld, addr(eqe)=%lx, val=0x%lx", eq->cons_idx, virt_to_bus(ptr), ptr[7]); + tprintf("cons)idx=%d, addr(eqe)=%x, val=0x%x", eq->cons_idx, virt_to_bus(ptr), ptr[7]); owner = (ptr[7] & 0x80000000) ? OWNER_HW : OWNER_SW; if (owner == OWNER_SW) { tprintf("got eqe"); diff --git a/src/drivers/net/mlx_ipoib/ib_mt25218.c b/src/drivers/net/mlx_ipoib/ib_mt25218.c index 174a2309..f16577f1 100644 --- a/src/drivers/net/mlx_ipoib/ib_mt25218.c +++ b/src/drivers/net/mlx_ipoib/ib_mt25218.c @@ -89,10 +89,6 @@ static struct dev_pci_struct memfree_pci_dev; static struct device_buffers_st *dev_buffers_p; static struct device_ib_data_st dev_ib_data; - - -struct map_icm_st icm_map_obj; - static int gw_write_cr(__u32 addr, __u32 data) { writel(htonl(data), memfree_pci_dev.cr_space + addr); @@ -159,7 +155,7 @@ static int ib_device_init(struct pci_device *dev) eprintf(""); return -1; } - tprintf("uar_base (pa:va) = 0x%lx %p", + tprintf("uar_base (pa:va) = 0x%lx 0x%lx", memfree_pci_dev.dev.bar[2] + UAR_IDX * 0x1000, memfree_pci_dev.uar); @@ -174,8 +170,6 @@ static inline unsigned long lalign(unsigned long buf, unsigned long align) (~(((unsigned long)align) - 1))); } -#include - static int init_dev_data(void) { unsigned long tmp; @@ -185,21 +179,17 @@ static int init_dev_data(void) dev_buffers_p = bus_to_virt(tmp); memreg_size = (__u32) (&memreg_size) - (__u32) dev_buffers_p; - tprintf("src_buf=%p, dev_buffers_p=%p, memreg_size=0x%lx", src_buf, + tprintf("src_buf=0x%lx, dev_buffers_p=0x%lx, memreg_size=0x%x", src_buf, dev_buffers_p, memreg_size); - tprintf("inprm: va=%p, pa=0x%lx", dev_buffers_p->inprm_buf, + tprintf("inprm: va=0x%lx, pa=0x%lx", dev_buffers_p->inprm_buf, virt_to_bus(dev_buffers_p->inprm_buf)); - tprintf("outprm: va=%p, pa=0x%lx", dev_buffers_p->outprm_buf, + tprintf("outprm: va=0x%lx, pa=0x%lx", dev_buffers_p->outprm_buf, virt_to_bus(dev_buffers_p->outprm_buf)); - userptr_t lotsofmem = umalloc ( reserve_size * 2 ); - if ( ! lotsofmem ) { - printf ( "Could not allocate large memblock\n" ); - return -1; - } - phys_mem.base = ( ( user_to_phys ( lotsofmem, 0 ) + reserve_size ) & - ~( reserve_size - 1 ) ); + phys_mem.base = + (virt_to_phys(_text) - reserve_size) & (~(reserve_size - 1)); + phys_mem.offset = 0; return 0; @@ -333,13 +323,9 @@ static void prep_sw2hw_mpt_buf(void *buf, __u32 mkey) INS_FLD(1, buf, arbelprm_mpt_st, r_w); INS_FLD(mkey, buf, arbelprm_mpt_st, mem_key); INS_FLD(GLOBAL_PD, buf, arbelprm_mpt_st, pd); - // INS_FLD(virt_to_bus(dev_buffers_p), buf, arbelprm_mpt_st, - // start_address_l); - // INS_FLD(memreg_size, buf, arbelprm_mpt_st, reg_wnd_len_l); - INS_FLD(0, buf, arbelprm_mpt_st, start_address_l); - INS_FLD(0, buf, arbelprm_mpt_st, start_address_h); - INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_l); - INS_FLD(0xffffffffUL, buf, arbelprm_mpt_st, reg_wnd_len_h); + INS_FLD(virt_to_bus(dev_buffers_p), buf, arbelprm_mpt_st, + start_address_l); + INS_FLD(memreg_size, buf, arbelprm_mpt_st, reg_wnd_len_l); } static void prep_sw2hw_eq_buf(void *buf, struct eqe_t *eq_buf) @@ -675,9 +661,9 @@ static int setup_hca(__u8 port, void **eq_p) tprintf("fw_rev_major=%d", qfw.fw_rev_major); tprintf("fw_rev_minor=%d", qfw.fw_rev_minor); tprintf("fw_rev_subminor=%d", qfw.fw_rev_subminor); - tprintf("error_buf_start_h=0x%lx", qfw.error_buf_start_h); - tprintf("error_buf_start_l=0x%lx", qfw.error_buf_start_l); - tprintf("error_buf_size=%ld", qfw.error_buf_size); + tprintf("error_buf_start_h=0x%x", qfw.error_buf_start_h); + tprintf("error_buf_start_l=0x%x", qfw.error_buf_start_l); + tprintf("error_buf_size=%d", qfw.error_buf_size); } @@ -758,20 +744,17 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_qps, MAX_APP_QPS, dev_lim.qpc_entry_sz, &log2_entries); - DBG ( "qpc_base_addr_l = %lx\n", icm_start ); init_hca.qpc_base_addr_l = icm_start; init_hca.log_num_of_qp = log2_entries; icm_start += (tmp << 12); icm_size += (tmp << 12); - DBG ( "eqpc_base_addr_l = %lx\n", icm_start ); init_hca.eqpc_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); tmp = get_req_icm_pages(dev_lim.log2_rsvd_srqs, 0, dev_lim.srq_entry_sz, &log2_entries); - DBG ( "srqc_base_addr_l = %lx\n", icm_start ); init_hca.srqc_base_addr_l = icm_start; init_hca.log_num_of_srq = log2_entries; icm_start += (tmp << 12); @@ -779,18 +762,15 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_ees, 0, dev_lim.eec_entry_sz, &log2_entries); - DBG ( "eec_base_addr_l = %lx\n", icm_start ); init_hca.eec_base_addr_l = icm_start; init_hca.log_num_of_ee = log2_entries; icm_start += (tmp << 12); icm_size += (tmp << 12); - DBG ( "eeec_base_addr_l = %lx\n", icm_start ); init_hca.eeec_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); - DBG ( "cqc_base_addr_l = %lx\n", icm_start ); tmp = get_req_icm_pages(dev_lim.log2_rsvd_cqs, MAX_APP_CQS, dev_lim.cqc_entry_sz, &log2_entries); @@ -801,35 +781,29 @@ static int setup_hca(__u8 port, void **eq_p) tmp = get_req_icm_pages(dev_lim.log2_rsvd_mtts, 0, dev_lim.mtt_entry_sz, &log2_entries); - DBG ( "mtt_base_addr_l = %lx\n", icm_start ); init_hca.mtt_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); tmp = get_req_icm_pages(dev_lim.log2_rsvd_mrws, 1, dev_lim.mpt_entry_sz, &log2_entries); - DBG ( "mpt_base_addr_l = %lx\n", icm_start ); init_hca.mpt_base_addr_l = icm_start; init_hca.log_mpt_sz = log2_entries; - DBG ( "log2_entries for mpt = %ld\n", log2_entries ); icm_start += (tmp << 12); icm_size += (tmp << 12); tmp = get_req_icm_pages(dev_lim.log2_rsvd_rdbs, 1, 32, /* size of rdb entry */ &log2_entries); - DBG ( "rdb_base_addr_l = %lx\n", icm_start ); init_hca.rdb_base_addr_l = icm_start; icm_start += (tmp << 12); icm_size += (tmp << 12); - DBG ( "eqc_base_addr_l = %lx\n", icm_start ); init_hca.eqc_base_addr_l = icm_start; init_hca.log_num_of_eq = LOG2_EQS; tmp = dev_lim.eqc_entry_sz * (1 << LOG2_EQS); icm_start += tmp; icm_size += tmp; - DBG ( "mc_base_addr_l = %lx\n", icm_start ); init_hca.mc_base_addr_l = icm_start; init_hca.log_mc_table_entry_sz = my_log2(MT_STRUCT_SIZE(arbelprm_mgm_entry_st)); @@ -840,8 +814,6 @@ static int setup_hca(__u8 port, void **eq_p) icm_start += (MT_STRUCT_SIZE(arbelprm_mgm_entry_st) * init_hca.mc_table_hash_sz); - DBG ( "icm_size = %lx\n", icm_size ); - rc = cmd_set_icm_size(icm_size, &aux_pages); if (rc) { ret = -1; @@ -864,7 +836,7 @@ static int setup_hca(__u8 port, void **eq_p) uar_context_pa = phys_mem.base + phys_mem.offset + dev_ib_data.uar_idx * 4096; uar_context_va = phys_to_virt(uar_context_pa); - tprintf("uar_context: va=%p, pa=0x%lx", uar_context_va, + tprintf("uar_context: va=0x%lx, pa=0x%lx", uar_context_va, uar_context_pa); dev_ib_data.uar_context_base = uar_context_va; @@ -878,12 +850,10 @@ static int setup_hca(__u8 port, void **eq_p) eprintf(""); goto undo_map_fa; } - icm_map_obj = map_obj; - phys_mem.offset += (1 << (map_obj.vpm_arr[0].log2_size + 12)); init_hca.log_max_uars = log_max_uars; - tprintf("inprm: va=%p, pa=0x%lx", inprm, virt_to_bus(inprm)); + tprintf("inprm: va=0x%lx, pa=0x%lx", inprm, virt_to_bus(inprm)); prep_init_hca_buf(&init_hca, inprm); rc = cmd_init_hca(inprm, MT_STRUCT_SIZE(arbelprm_init_hca_st)); if (rc) { @@ -1008,30 +978,6 @@ static int setup_hca(__u8 port, void **eq_p) return ret; } - -static int unset_hca(void) -{ - int rc, ret = 0; - - rc = cmd_unmap_icm(&icm_map_obj); - if (rc) - eprintf(""); - ret |= rc; - - - rc = cmd_unmap_icm_aux(); - if (rc) - eprintf(""); - ret |= rc; - - rc = cmd_unmap_fa(); - if (rc) - eprintf(""); - ret |= rc; - - return ret; -} - static void *get_inprm_buf(void) { return dev_buffers_p->inprm_buf; @@ -1140,14 +1086,9 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) struct send_doorbell_st dbell; __u32 nds; - DBG ( "Work queue entry:\n" ); - DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); - qp->post_send_counter++; WRITE_WORD_VOL(qp->send_uar_context, 2, htons(qp->post_send_counter)); - DBG ( "Doorbell record:\n" ); - DBG_HD ( qp->send_uar_context, 8 ); memset(&dbell, 0, sizeof dbell); INS_FLD(XDEV_NOPCODE_SEND, &dbell, arbelprm_send_doorbell_st, nopcode); @@ -1171,10 +1112,6 @@ static int post_send_req(void *qph, void *wqeh, __u8 num_gather) INS_FLD_TO_BE(XDEV_NOPCODE_SEND, &qp->last_posted_snd_wqe->next.next, arbelprm_wqe_segment_next_st, nopcode); - - DBG ( "Previous work queue entry's next field:\n" ); - DBG_HD ( &qp->last_posted_snd_wqe->next.next, - sizeof ( qp->last_posted_snd_wqe->next.next ) ); } rc = cmd_post_doorbell(&dbell, POST_SND_OFFSET); @@ -1195,8 +1132,6 @@ static int create_mads_qp(void **qp_pp, void **snd_cq_pp, void **rcv_cq_pp) __u8 nds; void *ptr; - DBG ( "*** Creating MADS queue pair ***\n" ); - qp = &dev_ib_data.mads_qp; /* set the pointer to the receive WQEs buffer */ @@ -1307,8 +1242,6 @@ static int create_mads_qp(void **qp_pp, void **snd_cq_pp, void **rcv_cq_pp) *rcv_cq_pp = &qp->rcv_cq; } - DBG ( "*** Created MADS queue pair ***\n" ); - return rc; } @@ -1322,8 +1255,6 @@ static int create_ipoib_qp(void **qp_pp, __u8 nds; void *ptr; - DBG ( "*** Creating IPoIB queue pair ***\n" ); - qp = &dev_ib_data.ipoib_qp; /* set the pointer to the receive WQEs buffer */ @@ -1333,7 +1264,7 @@ static int create_ipoib_qp(void **qp_pp, qp->rcv_buf_sz = IPOIB_RCV_BUF_SZ; qp->max_recv_wqes = NUM_IPOIB_RCV_WQES; - qp->recv_wqe_cur_free = 0; //NUM_IPOIB_RCV_WQES; + qp->recv_wqe_cur_free = NUM_IPOIB_RCV_WQES; qp->rcv_uar_context = dev_ib_data.uar_context_base + 8 * IPOIB_RCV_QP_DB_IDX; @@ -1429,8 +1360,6 @@ static int create_ipoib_qp(void **qp_pp, *rcv_cq_pp = &qp->rcv_cq; } - DBG ( "*** Created IPoIB queue pair ***\n" ); - return rc; } @@ -1451,8 +1380,6 @@ static int create_udqp(struct udqp_st *qp) qp->snd_cq.ci_db_ctx_pointer = dev_ib_data.uar_context_base + 8 * qp->snd_cq.ci_db_ctx_idx; - DBG ( "* Creating send CQ *\n" ); - /* create send CQ */ init_cq_buf(qp->snd_cq.cq_buf, qp->snd_cq.num_cqes); qp->snd_cq.cons_counter = 0; @@ -1469,8 +1396,6 @@ static int create_udqp(struct udqp_st *qp) goto exit; } - DBG ( "* Creating receive CQ *\n" ); - /* create receive CQ */ init_cq_buf(qp->rcv_cq.cq_buf, qp->rcv_cq.num_cqes); qp->rcv_cq.cons_counter = 0; @@ -1488,8 +1413,6 @@ static int create_udqp(struct udqp_st *qp) goto undo_snd_cq; } - DBG ( "* Creating QP *\n" ); - prep_rst2init_qpee_buf(inprm, qp->snd_cq.cqn, qp->rcv_cq.cqn, @@ -1627,15 +1550,6 @@ static void prep_send_wqe_buf(void *qph, len += offset; } snd_wqe->mpointer[0].byte_count = cpu_to_be32(len); - -#if 0 - DBG ( "prep_send_wqe_buf()\n" ); - DBG ( "snd_wqe:\n" ); - DBG_HD ( snd_wqe, sizeof ( *snd_wqe ) ); - DBG ( "packet:\n" ); - DBG_HD ( bus_to_virt(be32_to_cpu(snd_wqe->mpointer[0].local_addr_l)), - len ); -#endif } static void *alloc_ud_av(void) @@ -1744,7 +1658,7 @@ static void dev2ib_cqe(struct ib_cqe_st *ib_cqe_p, union cqe_st *cqe_p) byte_cnt); } -static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) +static int ib_poll_cq(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) { int rc; union cqe_st cqe; @@ -1776,7 +1690,7 @@ static int ib_poll_cqx(void *cqh, struct ib_cqe_st *ib_cqe_p, u8 * num_cqes) eprintf("vendor_syndrome=0x%lx", EX_FLD(cqe.error_cqe, arbelprm_completion_with_error_st, vendor_code)); - eprintf("wqe_addr=%p", wqe_p); + eprintf("wqe_addr=0x%lx", wqe_p); eprintf("myqpn=0x%lx", EX_FLD(cqe.error_cqe, arbelprm_completion_with_error_st, myqpn)); @@ -2006,9 +1920,6 @@ static void dev_post_dbell(void *dbell, __u32 offset) address = (unsigned long)(memfree_pci_dev.uar) + offset; tprintf("va=0x%lx pa=0x%lx", address, virt_to_bus((const void *)address)); - DBG ( "dev_post_dbell %08lx:%08lx to %lx\n", - htonl ( ptr[0] ), htonl ( ptr[1] ), - virt_to_phys ( memfree_pci_dev.uar + offset ) ); writel(htonl(ptr[0]), memfree_pci_dev.uar + offset); barrier(); address += 4; diff --git a/src/drivers/net/mlx_ipoib/ipoib.c b/src/drivers/net/mlx_ipoib/ipoib.c index d8dd6bf6..85eaac7a 100644 --- a/src/drivers/net/mlx_ipoib/ipoib.c +++ b/src/drivers/net/mlx_ipoib/ipoib.c @@ -357,12 +357,52 @@ static void modify_dhcp_resp(void *buf, __u16 size) modify_udp_csum(buf, size); } +static void get_my_client_id(__u8 * my_client_id) +{ + + my_client_id[0] = 0; + qpn2buf(ipoib_data.ipoib_qpn, my_client_id + 1); + memcpy(my_client_id + 4, ipoib_data.port_gid_raw, 16); +} + +static const __u8 *get_client_id(const void *buf, int len) +{ + const __u8 *ptr; + int delta; + + if (len < 268) + return NULL; + + /* pointer to just after magic cookie */ + ptr = (const __u8 *)buf + 268; + + /* find last client identifier option */ + do { + if (ptr[0] == 255) { + /* found end of options list */ + return NULL; + } + + if (ptr[0] == 0x3d) { + /* client identifer option */ + return ptr + 3; + } + + delta = ptr[1] + 2; + ptr += delta; + len -= delta; + } while (len > 0); + + return NULL; +} + static int handle_ipv4_packet(void *buf, void **out_buf_p, unsigned int *new_size_p, int *is_bcast_p) { void *new_buf; __u16 new_size; __u8 msg_type; + __u8 my_client_id[20]; new_buf = (void *)(((__u8 *) buf) + 4); new_size = (*new_size_p) - 4; @@ -371,6 +411,7 @@ static int handle_ipv4_packet(void *buf, void **out_buf_p, if (get_ip_protocl(new_buf) == IP_PROT_UDP) { __u16 udp_dst_port; + const __u8 *client_id; udp_dst_port = get_udp_dst_port(new_buf); @@ -379,6 +420,22 @@ static int handle_ipv4_packet(void *buf, void **out_buf_p, *out_buf_p = 0; return 0; } + + if (udp_dst_port == 68) { + get_my_client_id(my_client_id); + + /* packet client id */ + client_id = get_client_id(new_buf, new_size); + if (!client_id) { + *out_buf_p = 0; + return 0; + } + + if (memcmp(client_id, my_client_id, 20)) { + *out_buf_p = 0; + return 0; + } + } } msg_type = get_dhcp_msg_type(new_buf); @@ -458,9 +515,8 @@ static int ipoib_handle_rcv(void *buf, void **out_buf_p, rc = handle_ipv4_packet(buf, out_buf_p, new_size_p, is_bcast_p); return rc; } - tprintf("prot=0x%x", prot_type); - *out_buf_p = NULL; - return 0; + eprintf("prot=0x%x", prot_type); + return -1; } static int is_null_mac(const __u8 * mac) @@ -879,7 +935,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, void *buf, *out_buf; __u16 prot_type; - rc = ib_poll_cqx(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); + rc = ib_poll_cq(ipoib_data.rcv_cqh, &ib_cqe, &num_cqes); if (rc) { return rc; } @@ -897,7 +953,7 @@ static int ipoib_read_packet(__u16 * prot_p, void *data, unsigned int *size_p, new_size = ib_cqe.count - GRH_SIZE; buf = get_rcv_wqe_buf(ib_cqe.wqe, 1); - tprintf("buf=%p", buf); + tprintf("buf=%lx", buf); rc = ipoib_handle_rcv(buf, &out_buf, &new_size, is_bcast_p); if (rc) { eprintf(""); @@ -944,7 +1000,7 @@ static int ipoib_init(struct pci_device *pci) ipoib_data.ipoib_qpn = ib_get_qpn(qph); if(print_info) - printf("local ipoib qpn=0x%lx\n", ipoib_data.ipoib_qpn); + printf("local ipoib qpn=0x%x\n", ipoib_data.ipoib_qpn); ipoib_data.bcast_av = ib_data.bcast_av; ipoib_data.port_gid_raw = ib_data.port_gid.raw; diff --git a/src/drivers/net/mlx_ipoib/mt23108.c b/src/drivers/net/mlx_ipoib/mt23108.c index 37947a8c..492bc901 100644 --- a/src/drivers/net/mlx_ipoib/mt23108.c +++ b/src/drivers/net/mlx_ipoib/mt23108.c @@ -14,6 +14,10 @@ Skeleton NIC driver for Etherboot #include "etherboot.h" /* to get the interface to the body of the program */ #include "nic.h" +/* to get the PCI support functions, if this is a PCI NIC */ +#include +/* to get the ISA support functions, if this is an ISA NIC */ +#include #include "mt_version.c" #include "mt23108_imp.c" @@ -148,7 +152,7 @@ static void tavor_transmit(struct nic *nic, const char *dest, /* Destination */ /************************************************************************** DISABLE - Turn off ethernet interface ***************************************************************************/ -static void tavor_disable(struct nic *nic) +static void tavor_disable(struct dev *dev) { /* put the card in its initial state */ /* This function serves 3 purposes. @@ -160,24 +164,18 @@ static void tavor_disable(struct nic *nic) * This allows etherboot to reinitialize the interface * if something is something goes wrong. */ - if (nic || 1) { // ???? + if (dev || 1) { // ???? disable_imp(); } } -static struct nic_operations tavor_operations = { - .connect = dummy_connect, - .poll = tavor_poll, - .transmit = tavor_transmit, - .irq = tavor_irq, -}; - /************************************************************************** PROBE - Look for an adapter, this routine's visible to the outside ***************************************************************************/ -static int tavor_probe(struct nic *nic, struct pci_device *pci) +static int tavor_probe(struct dev *dev, struct pci_device *pci) { + struct nic *nic = (struct nic *)dev; int rc; unsigned char user_request; @@ -221,7 +219,10 @@ static int tavor_probe(struct nic *nic, struct pci_device *pci) nic->ioaddr = pci->ioaddr & ~3; nic->irqno = pci->irq; /* point to NIC specific routines */ - nic->nic_op = &tavor_operations; + dev->disable = tavor_disable; + nic->poll = tavor_poll; + nic->transmit = tavor_transmit; + nic->irq = tavor_irq; return 1; } @@ -229,12 +230,16 @@ static int tavor_probe(struct nic *nic, struct pci_device *pci) return 0; } -static struct pci_device_id tavor_nics[] = { +static struct pci_id tavor_nics[] = { PCI_ROM(0x15b3, 0x5a44, "MT23108", "MT23108 HCA driver"), PCI_ROM(0x15b3, 0x6278, "MT25208", "MT25208 HCA driver"), }; -PCI_DRIVER ( tavor_driver, tavor_nics, PCI_NO_CLASS ); - -DRIVER ( "MT23108/MT25208", nic_driver, pci_driver, tavor_driver, - tavor_probe, tavor_disable ); +struct pci_driver tavor_driver __pci_driver = { + .type = NIC_DRIVER, + .name = "MT23108/MT25208", + .probe = tavor_probe, + .ids = tavor_nics, + .id_count = sizeof(tavor_nics) / sizeof(tavor_nics[0]), + .class = 0, +}; diff --git a/src/drivers/net/mlx_ipoib/mt23108_imp.c b/src/drivers/net/mlx_ipoib/mt23108_imp.c index d2bdf46b..4e601668 100644 --- a/src/drivers/net/mlx_ipoib/mt23108_imp.c +++ b/src/drivers/net/mlx_ipoib/mt23108_imp.c @@ -91,12 +91,10 @@ static int transmit_imp(const char *dest, /* Destination */ rc = ipoib_send_packet(dest, type, packet, size); if (rc) { printf("*** ERROR IN SEND FLOW ***\n"); -#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ -#endif return -1; } @@ -108,7 +106,7 @@ static void hd(void *where, int n) int i; while (n > 0) { - printf("%p ", where); + printf("%X ", where); for (i = 0; i < ((n > 16) ? 16 : n); i++) printf(" %hhX", ((char *)where)[i]); printf("\n"); @@ -224,11 +222,9 @@ static int poll_imp(struct nic *nic, int retrieve, unsigned int *size_p) fatal_handling: printf("restarting Etherboot\n"); -#if 0 sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ -#endif return -1; } diff --git a/src/drivers/net/mlx_ipoib/mt25218.c b/src/drivers/net/mlx_ipoib/mt25218.c index 338a7db1..a603cdeb 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.c +++ b/src/drivers/net/mlx_ipoib/mt25218.c @@ -10,2151 +10,236 @@ Skeleton NIC driver for Etherboot * your option) any later version. */ -#include -#include -#include -#include -#include -#include -#include - /* to get some global routines like printf */ #include "etherboot.h" /* to get the interface to the body of the program */ #include "nic.h" +/* to get the PCI support functions, if this is a PCI NIC */ +#include +/* to get the ISA support functions, if this is an ISA NIC */ +#include -#define CREATE_OWN 1 - +#include "mt_version.c" #include "mt25218_imp.c" -#include "arbel.h" - - -/* Port to use */ -#define PXE_IB_PORT 1 - -/*************************************************************************** - * - * Queue number allocation - * - *************************************************************************** - */ - -/** - * Allocate queue number - * - * @v q_inuse Queue usage bitmask - * @v max_inuse Maximum number of in-use queues - * @ret qn_offset Free queue number offset, or negative error - */ -static int arbel_alloc_qn_offset ( arbel_bitmask_t *q_inuse, - unsigned int max_inuse ) { - unsigned int qn_offset = 0; - arbel_bitmask_t mask = 1; - - while ( qn_offset < max_inuse ) { - if ( ( mask & *q_inuse ) == 0 ) { - *q_inuse |= mask; - return qn_offset; - } - qn_offset++; - mask <<= 1; - if ( ! mask ) { - mask = 1; - q_inuse++; - } - } - return -ENFILE; -} - -/** - * Free queue number - * - * @v q_inuse Queue usage bitmask - * @v qn_offset Queue number offset - */ -static void arbel_free_qn_offset ( arbel_bitmask_t *q_inuse, int qn_offset ) { - arbel_bitmask_t mask; - - mask = ( 1 << ( qn_offset % ( 8 * sizeof ( mask ) ) ) ); - q_inuse += ( qn_offset / ( 8 * sizeof ( mask ) ) ); - *q_inuse &= ~mask; -} - -/*************************************************************************** - * - * HCA commands - * - *************************************************************************** - */ - -/** - * Wait for Arbel command completion - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_cmd_wait ( struct arbel *arbel, - struct arbelprm_hca_command_register *hcr ) { - unsigned int wait; - - for ( wait = ARBEL_HCR_MAX_WAIT_MS ; wait ; wait-- ) { - hcr->u.dwords[6] = - readl ( arbel->config + ARBEL_HCR_REG ( 6 ) ); - if ( MLX_GET ( hcr, go ) == 0 ) - return 0; - mdelay ( 1 ); - } - return -EBUSY; -} - -/** - * Issue HCA command - * - * @v arbel Arbel device - * @v command Command opcode, flags and input/output lengths - * @v op_mod Opcode modifier (0 if no modifier applicable) - * @v in Input parameters - * @v in_mod Input modifier (0 if no modifier applicable) - * @v out Output parameters - * @ret rc Return status code - */ -static int arbel_cmd ( struct arbel *arbel, unsigned long command, - unsigned int op_mod, const void *in, - unsigned int in_mod, void *out ) { - struct arbelprm_hca_command_register hcr; - unsigned int opcode = ARBEL_HCR_OPCODE ( command ); - size_t in_len = ARBEL_HCR_IN_LEN ( command ); - size_t out_len = ARBEL_HCR_OUT_LEN ( command ); - void *in_buffer; - void *out_buffer; - unsigned int status; - unsigned int i; - int rc; - - assert ( in_len <= ARBEL_MBOX_SIZE ); - assert ( out_len <= ARBEL_MBOX_SIZE ); - - DBGC2 ( arbel, "Arbel %p command %02x in %zx%s out %zx%s\n", - arbel, opcode, in_len, - ( ( command & ARBEL_HCR_IN_MBOX ) ? "(mbox)" : "" ), out_len, - ( ( command & ARBEL_HCR_OUT_MBOX ) ? "(mbox)" : "" ) ); - - /* Check that HCR is free */ - if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { - DBGC ( arbel, "Arbel %p command interface locked\n", arbel ); - return rc; - } - - /* Prepare HCR */ - memset ( &hcr, 0, sizeof ( hcr ) ); - in_buffer = &hcr.u.dwords[0]; - if ( in_len && ( command & ARBEL_HCR_IN_MBOX ) ) { - in_buffer = arbel->mailbox_in; - MLX_FILL_1 ( &hcr, 1, in_param_l, virt_to_bus ( in_buffer ) ); - } - memcpy ( in_buffer, in, in_len ); - MLX_FILL_1 ( &hcr, 2, input_modifier, in_mod ); - out_buffer = &hcr.u.dwords[3]; - if ( out_len && ( command & ARBEL_HCR_OUT_MBOX ) ) { - out_buffer = arbel->mailbox_out; - MLX_FILL_1 ( &hcr, 4, out_param_l, - virt_to_bus ( out_buffer ) ); - } - MLX_FILL_3 ( &hcr, 6, - opcode, opcode, - opcode_modifier, op_mod, - go, 1 ); - DBGC2_HD ( arbel, &hcr, sizeof ( hcr ) ); - if ( in_len ) { - DBGC2 ( arbel, "Input:\n" ); - DBGC2_HD ( arbel, in, ( ( in_len < 512 ) ? in_len : 512 ) ); - } - - /* Issue command */ - for ( i = 0 ; i < ( sizeof ( hcr ) / sizeof ( hcr.u.dwords[0] ) ) ; - i++ ) { - writel ( hcr.u.dwords[i], - arbel->config + ARBEL_HCR_REG ( i ) ); - barrier(); - } - - /* Wait for command completion */ - if ( ( rc = arbel_cmd_wait ( arbel, &hcr ) ) != 0 ) { - DBGC ( arbel, "Arbel %p timed out waiting for command:\n", - arbel ); - DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); - return rc; - } - - /* Check command status */ - status = MLX_GET ( &hcr, status ); - if ( status != 0 ) { - DBGC ( arbel, "Arbel %p command failed with status %02x:\n", - arbel, status ); - DBGC_HD ( arbel, &hcr, sizeof ( hcr ) ); - return -EIO; - } - - /* Read output parameters, if any */ - hcr.u.dwords[3] = readl ( arbel->config + ARBEL_HCR_REG ( 3 ) ); - hcr.u.dwords[4] = readl ( arbel->config + ARBEL_HCR_REG ( 4 ) ); - memcpy ( out, out_buffer, out_len ); - if ( out_len ) { - DBGC2 ( arbel, "Output:\n" ); - DBGC2_HD ( arbel, out, ( ( out_len < 512 ) ? out_len : 512 ) ); - } - - return 0; -} - -static inline int -arbel_cmd_query_dev_lim ( struct arbel *arbel, - struct arbelprm_query_dev_lim *dev_lim ) { - return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_DEV_LIM, - 1, sizeof ( *dev_lim ) ), - 0, NULL, 0, dev_lim ); -} - -static inline int -arbel_cmd_query_fw ( struct arbel *arbel, struct arbelprm_query_fw *fw ) { - return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_QUERY_FW, - 1, sizeof ( *fw ) ), - 0, NULL, 0, fw ); -} - -static inline int -arbel_cmd_init_hca ( struct arbel *arbel, - const struct arbelprm_init_hca *init_hca ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_HCA, - 1, sizeof ( *init_hca ) ), - 0, init_hca, 0, NULL ); -} - -static inline int -arbel_cmd_close_hca ( struct arbel *arbel ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_HCA ), - 0, NULL, 0, NULL ); -} - -static inline int -arbel_cmd_init_ib ( struct arbel *arbel, unsigned int port, - const struct arbelprm_init_ib *init_ib ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT_IB, - 1, sizeof ( *init_ib ) ), - 0, init_ib, port, NULL ); -} - -static inline int -arbel_cmd_close_ib ( struct arbel *arbel, unsigned int port ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_CLOSE_IB ), - 0, NULL, port, NULL ); -} - -static inline int -arbel_cmd_sw2hw_mpt ( struct arbel *arbel, unsigned int index, - const struct arbelprm_mpt *mpt ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_MPT, - 1, sizeof ( *mpt ) ), - 0, mpt, index, NULL ); -} - -static inline int -arbel_cmd_sw2hw_eq ( struct arbel *arbel, unsigned int index, - const struct arbelprm_eqc *eqc ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_EQ, - 1, sizeof ( *eqc ) ), - 0, eqc, index, NULL ); -} - -static inline int -arbel_cmd_hw2sw_eq ( struct arbel *arbel, unsigned int index ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_HW2SW_EQ ), - 1, NULL, index, NULL ); -} - -static inline int -arbel_cmd_sw2hw_cq ( struct arbel *arbel, unsigned long cqn, - const struct arbelprm_completion_queue_context *cqctx ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_SW2HW_CQ, - 1, sizeof ( *cqctx ) ), - 0, cqctx, cqn, NULL ); -} - -static inline int -arbel_cmd_hw2sw_cq ( struct arbel *arbel, unsigned long cqn, - struct arbelprm_completion_queue_context *cqctx) { - return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_HW2SW_CQ, - 1, sizeof ( *cqctx ) ), - 0, NULL, cqn, cqctx ); -} - -static inline int -arbel_cmd_rst2init_qpee ( struct arbel *arbel, unsigned long qpn, - const struct arbelprm_qp_ee_state_transitions *ctx ){ - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_RST2INIT_QPEE, - 1, sizeof ( *ctx ) ), - 0, ctx, qpn, NULL ); -} - -static inline int -arbel_cmd_init2rtr_qpee ( struct arbel *arbel, unsigned long qpn, - const struct arbelprm_qp_ee_state_transitions *ctx ){ - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_INIT2RTR_QPEE, - 1, sizeof ( *ctx ) ), - 0, ctx, qpn, NULL ); -} - -static inline int -arbel_cmd_rtr2rts_qpee ( struct arbel *arbel, unsigned long qpn, - const struct arbelprm_qp_ee_state_transitions *ctx ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_RTR2RTS_QPEE, - 1, sizeof ( *ctx ) ), - 0, ctx, qpn, NULL ); -} - -static inline int -arbel_cmd_2rst_qpee ( struct arbel *arbel, unsigned long qpn ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_2RST_QPEE ), - 0x03, NULL, qpn, NULL ); -} - -static inline int -arbel_cmd_mad_ifc ( struct arbel *arbel, union arbelprm_mad *mad ) { - return arbel_cmd ( arbel, - ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MAD_IFC, - 1, sizeof ( *mad ), - 1, sizeof ( *mad ) ), - 0x03, mad, PXE_IB_PORT, mad ); -} - -static inline int -arbel_cmd_read_mgm ( struct arbel *arbel, unsigned int index, - struct arbelprm_mgm_entry *mgm ) { - return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_READ_MGM, - 1, sizeof ( *mgm ) ), - 0, NULL, index, mgm ); -} - -static inline int -arbel_cmd_write_mgm ( struct arbel *arbel, unsigned int index, - const struct arbelprm_mgm_entry *mgm ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_WRITE_MGM, - 1, sizeof ( *mgm ) ), - 0, mgm, index, NULL ); -} - -static inline int -arbel_cmd_mgid_hash ( struct arbel *arbel, const struct ib_gid *gid, - struct arbelprm_mgm_hash *hash ) { - return arbel_cmd ( arbel, - ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_MGID_HASH, - 1, sizeof ( *gid ), - 0, sizeof ( *hash ) ), - 0, gid, 0, hash ); -} - -static inline int -arbel_cmd_run_fw ( struct arbel *arbel ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_RUN_FW ), - 0, NULL, 0, NULL ); -} - -static inline int -arbel_cmd_disable_lam ( struct arbel *arbel ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_DISABLE_LAM ), - 0, NULL, 0, NULL ); -} - -static inline int -arbel_cmd_enable_lam ( struct arbel *arbel, struct arbelprm_access_lam *lam ) { - return arbel_cmd ( arbel, - ARBEL_HCR_OUT_CMD ( ARBEL_HCR_ENABLE_LAM, - 1, sizeof ( *lam ) ), - 1, NULL, 0, lam ); -} - -static inline int -arbel_cmd_unmap_icm ( struct arbel *arbel, unsigned int page_count ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM ), - 0, NULL, page_count, NULL ); -} - -static inline int -arbel_cmd_map_icm ( struct arbel *arbel, - const struct arbelprm_virtual_physical_mapping *map ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM, - 1, sizeof ( *map ) ), - 0, map, 1, NULL ); -} - -static inline int -arbel_cmd_unmap_icm_aux ( struct arbel *arbel ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_ICM_AUX ), - 0, NULL, 0, NULL ); -} - -static inline int -arbel_cmd_map_icm_aux ( struct arbel *arbel, - const struct arbelprm_virtual_physical_mapping *map ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_ICM_AUX, - 1, sizeof ( *map ) ), - 0, map, 1, NULL ); -} - -static inline int -arbel_cmd_set_icm_size ( struct arbel *arbel, - const struct arbelprm_scalar_parameter *icm_size, - struct arbelprm_scalar_parameter *icm_aux_size ) { - return arbel_cmd ( arbel, - ARBEL_HCR_INOUT_CMD ( ARBEL_HCR_SET_ICM_SIZE, - 0, sizeof ( *icm_size ), - 0, sizeof ( *icm_aux_size ) ), - 0, icm_size, 0, icm_aux_size ); -} - -static inline int -arbel_cmd_unmap_fa ( struct arbel *arbel ) { - return arbel_cmd ( arbel, - ARBEL_HCR_VOID_CMD ( ARBEL_HCR_UNMAP_FA ), - 0, NULL, 0, NULL ); -} - -static inline int -arbel_cmd_map_fa ( struct arbel *arbel, - const struct arbelprm_virtual_physical_mapping *map ) { - return arbel_cmd ( arbel, - ARBEL_HCR_IN_CMD ( ARBEL_HCR_MAP_FA, - 1, sizeof ( *map ) ), - 0, map, 1, NULL ); -} - -/*************************************************************************** - * - * Completion queue operations - * - *************************************************************************** - */ - -/** - * Create completion queue - * - * @v ibdev Infiniband device - * @v cq Completion queue - * @ret rc Return status code - */ -static int arbel_create_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_completion_queue *arbel_cq; - struct arbelprm_completion_queue_context cqctx; - struct arbelprm_cq_ci_db_record *ci_db_rec; - struct arbelprm_cq_arm_db_record *arm_db_rec; - int cqn_offset; - unsigned int i; - int rc; - - /* Find a free completion queue number */ - cqn_offset = arbel_alloc_qn_offset ( arbel->cq_inuse, ARBEL_MAX_CQS ); - if ( cqn_offset < 0 ) { - DBGC ( arbel, "Arbel %p out of completion queues\n", arbel ); - rc = cqn_offset; - goto err_cqn_offset; - } - cq->cqn = ( arbel->limits.reserved_cqs + cqn_offset ); - - /* Allocate control structures */ - arbel_cq = zalloc ( sizeof ( *arbel_cq ) ); - if ( ! arbel_cq ) { - rc = -ENOMEM; - goto err_arbel_cq; - } - arbel_cq->ci_doorbell_idx = arbel_cq_ci_doorbell_idx ( cqn_offset ); - arbel_cq->arm_doorbell_idx = arbel_cq_arm_doorbell_idx ( cqn_offset ); - - /* Allocate completion queue itself */ - arbel_cq->cqe_size = ( cq->num_cqes * sizeof ( arbel_cq->cqe[0] ) ); - arbel_cq->cqe = malloc_dma ( arbel_cq->cqe_size, - sizeof ( arbel_cq->cqe[0] ) ); - if ( ! arbel_cq->cqe ) { - rc = -ENOMEM; - goto err_cqe; - } - memset ( arbel_cq->cqe, 0, arbel_cq->cqe_size ); - for ( i = 0 ; i < cq->num_cqes ; i++ ) { - MLX_FILL_1 ( &arbel_cq->cqe[i].normal, 7, owner, 1 ); - } - barrier(); - - /* Initialise doorbell records */ - ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; - MLX_FILL_1 ( ci_db_rec, 0, counter, 0 ); - MLX_FILL_2 ( ci_db_rec, 1, - res, ARBEL_UAR_RES_CQ_CI, - cq_number, cq->cqn ); - arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; - MLX_FILL_1 ( arm_db_rec, 0, counter, 0 ); - MLX_FILL_2 ( arm_db_rec, 1, - res, ARBEL_UAR_RES_CQ_ARM, - cq_number, cq->cqn ); - - /* Hand queue over to hardware */ - memset ( &cqctx, 0, sizeof ( cqctx ) ); - MLX_FILL_1 ( &cqctx, 0, st, 0xa /* "Event fired" */ ); - MLX_FILL_1 ( &cqctx, 2, start_address_l, - virt_to_bus ( arbel_cq->cqe ) ); - MLX_FILL_2 ( &cqctx, 3, - usr_page, arbel->limits.reserved_uars, - log_cq_size, fls ( cq->num_cqes - 1 ) ); - MLX_FILL_1 ( &cqctx, 5, c_eqn, ARBEL_NO_EQ ); - MLX_FILL_1 ( &cqctx, 6, pd, ARBEL_GLOBAL_PD ); - MLX_FILL_1 ( &cqctx, 7, l_key, arbel->reserved_lkey ); - MLX_FILL_1 ( &cqctx, 12, cqn, cq->cqn ); - MLX_FILL_1 ( &cqctx, 13, - cq_ci_db_record, arbel_cq->ci_doorbell_idx ); - MLX_FILL_1 ( &cqctx, 14, - cq_state_db_record, arbel_cq->arm_doorbell_idx ); - if ( ( rc = arbel_cmd_sw2hw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { - DBGC ( arbel, "Arbel %p SW2HW_CQ failed: %s\n", - arbel, strerror ( rc ) ); - goto err_sw2hw_cq; - } - - DBGC ( arbel, "Arbel %p CQN %#lx ring at [%p,%p)\n", - arbel, cq->cqn, arbel_cq->cqe, - ( ( ( void * ) arbel_cq->cqe ) + arbel_cq->cqe_size ) ); - cq->dev_priv = arbel_cq; - return 0; - - err_sw2hw_cq: - MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); - err_cqe: - free ( arbel_cq ); - err_arbel_cq: - arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); - err_cqn_offset: - return rc; -} - -/** - * Destroy completion queue - * - * @v ibdev Infiniband device - * @v cq Completion queue - */ -static void arbel_destroy_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_completion_queue *arbel_cq = cq->dev_priv; - struct arbelprm_completion_queue_context cqctx; - struct arbelprm_cq_ci_db_record *ci_db_rec; - struct arbelprm_cq_arm_db_record *arm_db_rec; - int cqn_offset; - int rc; - - /* Take ownership back from hardware */ - if ( ( rc = arbel_cmd_hw2sw_cq ( arbel, cq->cqn, &cqctx ) ) != 0 ) { - DBGC ( arbel, "Arbel %p FATAL HW2SW_CQ failed on CQN %#lx: " - "%s\n", arbel, cq->cqn, strerror ( rc ) ); - /* Leak memory and return; at least we avoid corruption */ - return; - } - - /* Clear doorbell records */ - ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; - arm_db_rec = &arbel->db_rec[arbel_cq->arm_doorbell_idx].cq_arm; - MLX_FILL_1 ( ci_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - MLX_FILL_1 ( arm_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - - /* Free memory */ - free_dma ( arbel_cq->cqe, arbel_cq->cqe_size ); - free ( arbel_cq ); - - /* Mark queue number as free */ - cqn_offset = ( cq->cqn - arbel->limits.reserved_cqs ); - arbel_free_qn_offset ( arbel->cq_inuse, cqn_offset ); - - cq->dev_priv = NULL; -} - -/*************************************************************************** - * - * Queue pair operations - * - *************************************************************************** - */ - -/** - * Create send work queue - * - * @v arbel_send_wq Send work queue - * @v num_wqes Number of work queue entries - * @ret rc Return status code - */ -static int arbel_create_send_wq ( struct arbel_send_work_queue *arbel_send_wq, - unsigned int num_wqes ) { - struct arbelprm_ud_send_wqe *wqe; - struct arbelprm_ud_send_wqe *next_wqe; - unsigned int wqe_idx_mask; - unsigned int i; - - /* Allocate work queue */ - arbel_send_wq->wqe_size = ( num_wqes * - sizeof ( arbel_send_wq->wqe[0] ) ); - arbel_send_wq->wqe = malloc_dma ( arbel_send_wq->wqe_size, - sizeof ( arbel_send_wq->wqe[0] ) ); - if ( ! arbel_send_wq->wqe ) - return -ENOMEM; - memset ( arbel_send_wq->wqe, 0, arbel_send_wq->wqe_size ); - - /* Link work queue entries */ - wqe_idx_mask = ( num_wqes - 1 ); - for ( i = 0 ; i < num_wqes ; i++ ) { - wqe = &arbel_send_wq->wqe[i].ud; - next_wqe = &arbel_send_wq->wqe[ ( i + 1 ) & wqe_idx_mask ].ud; - MLX_FILL_1 ( &wqe->next, 0, nda_31_6, - ( virt_to_bus ( next_wqe ) >> 6 ) ); - } - - return 0; -} - -/** - * Create receive work queue - * - * @v arbel_recv_wq Receive work queue - * @v num_wqes Number of work queue entries - * @ret rc Return status code - */ -static int arbel_create_recv_wq ( struct arbel_recv_work_queue *arbel_recv_wq, - unsigned int num_wqes ) { - struct arbelprm_recv_wqe *wqe; - struct arbelprm_recv_wqe *next_wqe; - unsigned int wqe_idx_mask; - size_t nds; - unsigned int i; - unsigned int j; - - /* Allocate work queue */ - arbel_recv_wq->wqe_size = ( num_wqes * - sizeof ( arbel_recv_wq->wqe[0] ) ); - arbel_recv_wq->wqe = malloc_dma ( arbel_recv_wq->wqe_size, - sizeof ( arbel_recv_wq->wqe[0] ) ); - if ( ! arbel_recv_wq->wqe ) - return -ENOMEM; - memset ( arbel_recv_wq->wqe, 0, arbel_recv_wq->wqe_size ); - - /* Link work queue entries */ - wqe_idx_mask = ( num_wqes - 1 ); - nds = ( ( offsetof ( typeof ( *wqe ), data ) + - sizeof ( wqe->data[0] ) ) >> 4 ); - for ( i = 0 ; i < num_wqes ; i++ ) { - wqe = &arbel_recv_wq->wqe[i].recv; - next_wqe = &arbel_recv_wq->wqe[( i + 1 ) & wqe_idx_mask].recv; - MLX_FILL_1 ( &wqe->next, 0, nda_31_6, - ( virt_to_bus ( next_wqe ) >> 6 ) ); - MLX_FILL_1 ( &wqe->next, 1, nds, ( sizeof ( *wqe ) / 16 ) ); - for ( j = 0 ; ( ( ( void * ) &wqe->data[j] ) < - ( ( void * ) ( wqe + 1 ) ) ) ; j++ ) { - MLX_FILL_1 ( &wqe->data[j], 1, - l_key, ARBEL_INVALID_LKEY ); - } - } - - return 0; -} - -/** - * Create queue pair - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @ret rc Return status code - */ -static int arbel_create_qp ( struct ib_device *ibdev, - struct ib_queue_pair *qp ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_queue_pair *arbel_qp; - struct arbelprm_qp_ee_state_transitions qpctx; - struct arbelprm_qp_db_record *send_db_rec; - struct arbelprm_qp_db_record *recv_db_rec; - int qpn_offset; - int rc; - - /* Find a free queue pair number */ - qpn_offset = arbel_alloc_qn_offset ( arbel->qp_inuse, ARBEL_MAX_QPS ); - if ( qpn_offset < 0 ) { - DBGC ( arbel, "Arbel %p out of queue pairs\n", arbel ); - rc = qpn_offset; - goto err_qpn_offset; - } - qp->qpn = ( ARBEL_QPN_BASE + arbel->limits.reserved_qps + qpn_offset ); - - /* Allocate control structures */ - arbel_qp = zalloc ( sizeof ( *arbel_qp ) ); - if ( ! arbel_qp ) { - rc = -ENOMEM; - goto err_arbel_qp; - } - arbel_qp->send.doorbell_idx = arbel_send_doorbell_idx ( qpn_offset ); - arbel_qp->recv.doorbell_idx = arbel_recv_doorbell_idx ( qpn_offset ); - - /* Create send and receive work queues */ - if ( ( rc = arbel_create_send_wq ( &arbel_qp->send, - qp->send.num_wqes ) ) != 0 ) - goto err_create_send_wq; - if ( ( rc = arbel_create_recv_wq ( &arbel_qp->recv, - qp->recv.num_wqes ) ) != 0 ) - goto err_create_recv_wq; - - /* Initialise doorbell records */ - send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; - MLX_FILL_1 ( send_db_rec, 0, counter, 0 ); - MLX_FILL_2 ( send_db_rec, 1, - res, ARBEL_UAR_RES_SQ, - qp_number, qp->qpn ); - recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; - MLX_FILL_1 ( recv_db_rec, 0, counter, 0 ); - MLX_FILL_2 ( recv_db_rec, 1, - res, ARBEL_UAR_RES_RQ, - qp_number, qp->qpn ); - - /* Hand queue over to hardware */ - memset ( &qpctx, 0, sizeof ( qpctx ) ); - MLX_FILL_3 ( &qpctx, 2, - qpc_eec_data.de, 1, - qpc_eec_data.pm_state, 0x03 /* Always 0x03 for UD */, - qpc_eec_data.st, ARBEL_ST_UD ); - MLX_FILL_6 ( &qpctx, 4, - qpc_eec_data.mtu, ARBEL_MTU_2048, - qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */, - qpc_eec_data.log_rq_size, fls ( qp->recv.num_wqes - 1 ), - qpc_eec_data.log_rq_stride, - ( fls ( sizeof ( arbel_qp->recv.wqe[0] ) - 1 ) - 4 ), - qpc_eec_data.log_sq_size, fls ( qp->send.num_wqes - 1 ), - qpc_eec_data.log_sq_stride, - ( fls ( sizeof ( arbel_qp->send.wqe[0] ) - 1 ) - 4 ) ); - MLX_FILL_1 ( &qpctx, 5, - qpc_eec_data.usr_page, arbel->limits.reserved_uars ); - MLX_FILL_1 ( &qpctx, 10, qpc_eec_data.primary_address_path.port_number, - PXE_IB_PORT ); - MLX_FILL_1 ( &qpctx, 27, qpc_eec_data.pd, ARBEL_GLOBAL_PD ); - MLX_FILL_1 ( &qpctx, 29, qpc_eec_data.wqe_lkey, arbel->reserved_lkey ); - MLX_FILL_1 ( &qpctx, 30, qpc_eec_data.ssc, 1 ); - MLX_FILL_1 ( &qpctx, 33, qpc_eec_data.cqn_snd, qp->send.cq->cqn ); - MLX_FILL_1 ( &qpctx, 34, qpc_eec_data.snd_wqe_base_adr_l, - ( virt_to_bus ( arbel_qp->send.wqe ) >> 6 ) ); - MLX_FILL_1 ( &qpctx, 35, qpc_eec_data.snd_db_record_index, - arbel_qp->send.doorbell_idx ); - MLX_FILL_1 ( &qpctx, 38, qpc_eec_data.rsc, 1 ); - MLX_FILL_1 ( &qpctx, 41, qpc_eec_data.cqn_rcv, qp->recv.cq->cqn ); - MLX_FILL_1 ( &qpctx, 42, qpc_eec_data.rcv_wqe_base_adr_l, - ( virt_to_bus ( arbel_qp->recv.wqe ) >> 6 ) ); - MLX_FILL_1 ( &qpctx, 43, qpc_eec_data.rcv_db_record_index, - arbel_qp->recv.doorbell_idx ); - MLX_FILL_1 ( &qpctx, 44, qpc_eec_data.q_key, qp->qkey ); - if ( ( rc = arbel_cmd_rst2init_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ - DBGC ( arbel, "Arbel %p RST2INIT_QPEE failed: %s\n", - arbel, strerror ( rc ) ); - goto err_rst2init_qpee; - } - memset ( &qpctx, 0, sizeof ( qpctx ) ); - MLX_FILL_2 ( &qpctx, 4, - qpc_eec_data.mtu, ARBEL_MTU_2048, - qpc_eec_data.msg_max, 11 /* 2^11 = 2048 */ ); - if ( ( rc = arbel_cmd_init2rtr_qpee ( arbel, qp->qpn, &qpctx )) != 0 ){ - DBGC ( arbel, "Arbel %p INIT2RTR_QPEE failed: %s\n", - arbel, strerror ( rc ) ); - goto err_init2rtr_qpee; - } - memset ( &qpctx, 0, sizeof ( qpctx ) ); - if ( ( rc = arbel_cmd_rtr2rts_qpee ( arbel, qp->qpn, &qpctx ) ) != 0 ){ - DBGC ( arbel, "Arbel %p RTR2RTS_QPEE failed: %s\n", - arbel, strerror ( rc ) ); - goto err_rtr2rts_qpee; - } - - DBGC ( arbel, "Arbel %p QPN %#lx send ring at [%p,%p)\n", - arbel, qp->qpn, arbel_qp->send.wqe, - ( ( (void *) arbel_qp->send.wqe ) + arbel_qp->send.wqe_size ) ); - DBGC ( arbel, "Arbel %p QPN %#lx receive ring at [%p,%p)\n", - arbel, qp->qpn, arbel_qp->recv.wqe, - ( ( (void *) arbel_qp->recv.wqe ) + arbel_qp->recv.wqe_size ) ); - qp->dev_priv = arbel_qp; - return 0; - - err_rtr2rts_qpee: - err_init2rtr_qpee: - arbel_cmd_2rst_qpee ( arbel, qp->qpn ); - err_rst2init_qpee: - MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); - err_create_recv_wq: - free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); - err_create_send_wq: - free ( arbel_qp ); - err_arbel_qp: - arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); - err_qpn_offset: - return rc; -} - -/** - * Destroy queue pair - * - * @v ibdev Infiniband device - * @v qp Queue pair - */ -static void arbel_destroy_qp ( struct ib_device *ibdev, - struct ib_queue_pair *qp ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_queue_pair *arbel_qp = qp->dev_priv; - struct arbelprm_qp_db_record *send_db_rec; - struct arbelprm_qp_db_record *recv_db_rec; - int qpn_offset; - int rc; - - /* Take ownership back from hardware */ - if ( ( rc = arbel_cmd_2rst_qpee ( arbel, qp->qpn ) ) != 0 ) { - DBGC ( arbel, "Arbel %p FATAL 2RST_QPEE failed on QPN %#lx: " - "%s\n", arbel, qp->qpn, strerror ( rc ) ); - /* Leak memory and return; at least we avoid corruption */ - return; - } - - /* Clear doorbell records */ - send_db_rec = &arbel->db_rec[arbel_qp->send.doorbell_idx].qp; - recv_db_rec = &arbel->db_rec[arbel_qp->recv.doorbell_idx].qp; - MLX_FILL_1 ( send_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - MLX_FILL_1 ( recv_db_rec, 1, res, ARBEL_UAR_RES_NONE ); - - /* Free memory */ - free_dma ( arbel_qp->send.wqe, arbel_qp->send.wqe_size ); - free_dma ( arbel_qp->recv.wqe, arbel_qp->recv.wqe_size ); - free ( arbel_qp ); - - /* Mark queue number as free */ - qpn_offset = ( qp->qpn - ARBEL_QPN_BASE - arbel->limits.reserved_qps ); - arbel_free_qn_offset ( arbel->qp_inuse, qpn_offset ); - - qp->dev_priv = NULL; -} - -/*************************************************************************** - * - * Work request operations - * - *************************************************************************** - */ - -/** - * Ring doorbell register in UAR - * - * @v arbel Arbel device - * @v db_reg Doorbell register structure - * @v offset Address of doorbell - */ -static void arbel_ring_doorbell ( struct arbel *arbel, - union arbelprm_doorbell_register *db_reg, - unsigned int offset ) { - - DBGC2 ( arbel, "Arbel %p ringing doorbell %08lx:%08lx at %lx\n", - arbel, db_reg->dword[0], db_reg->dword[1], - virt_to_phys ( arbel->uar + offset ) ); - - barrier(); - writel ( db_reg->dword[0], ( arbel->uar + offset + 0 ) ); - barrier(); - writel ( db_reg->dword[1], ( arbel->uar + offset + 4 ) ); -} - -/** GID used for GID-less send work queue entries */ -static const struct ib_gid arbel_no_gid = { - { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0 } } -}; - -/** - * Post send work queue entry - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v av Address vector - * @v iobuf I/O buffer - * @ret rc Return status code - */ -static int arbel_post_send ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct ib_address_vector *av, - struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_queue_pair *arbel_qp = qp->dev_priv; - struct ib_work_queue *wq = &qp->send; - struct arbel_send_work_queue *arbel_send_wq = &arbel_qp->send; - struct arbelprm_ud_send_wqe *prev_wqe; - struct arbelprm_ud_send_wqe *wqe; - struct arbelprm_qp_db_record *qp_db_rec; - union arbelprm_doorbell_register db_reg; - const struct ib_gid *gid; - unsigned int wqe_idx_mask; - size_t nds; - - /* Allocate work queue entry */ - wqe_idx_mask = ( wq->num_wqes - 1 ); - if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { - DBGC ( arbel, "Arbel %p send queue full", arbel ); - return -ENOBUFS; - } - wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; - prev_wqe = &arbel_send_wq->wqe[(wq->next_idx - 1) & wqe_idx_mask].ud; - wqe = &arbel_send_wq->wqe[wq->next_idx & wqe_idx_mask].ud; - - /* Construct work queue entry */ - MLX_FILL_1 ( &wqe->next, 1, always1, 1 ); - memset ( &wqe->ctrl, 0, sizeof ( wqe->ctrl ) ); - MLX_FILL_1 ( &wqe->ctrl, 0, always1, 1 ); - memset ( &wqe->ud, 0, sizeof ( wqe->ud ) ); - MLX_FILL_2 ( &wqe->ud, 0, - ud_address_vector.pd, ARBEL_GLOBAL_PD, - ud_address_vector.port_number, PXE_IB_PORT ); - MLX_FILL_2 ( &wqe->ud, 1, - ud_address_vector.rlid, av->dlid, - ud_address_vector.g, av->gid_present ); - MLX_FILL_2 ( &wqe->ud, 2, - ud_address_vector.max_stat_rate, - ( ( av->rate >= 3 ) ? 0 : 1 ), - ud_address_vector.msg, 3 ); - MLX_FILL_1 ( &wqe->ud, 3, ud_address_vector.sl, av->sl ); - gid = ( av->gid_present ? &av->gid : &arbel_no_gid ); - memcpy ( &wqe->ud.u.dwords[4], gid, sizeof ( *gid ) ); - MLX_FILL_1 ( &wqe->ud, 8, destination_qp, av->dest_qp ); - MLX_FILL_1 ( &wqe->ud, 9, q_key, av->qkey ); - MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_len ( iobuf ) ); - MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); - MLX_FILL_1 ( &wqe->data[0], 3, - local_address_l, virt_to_bus ( iobuf->data ) ); - - /* Update previous work queue entry's "next" field */ - nds = ( ( offsetof ( typeof ( *wqe ), data ) + - sizeof ( wqe->data[0] ) ) >> 4 ); - MLX_SET ( &prev_wqe->next, nopcode, ARBEL_OPCODE_SEND ); - MLX_FILL_3 ( &prev_wqe->next, 1, - nds, nds, - f, 1, - always1, 1 ); - - /* Update doorbell record */ - barrier(); - qp_db_rec = &arbel->db_rec[arbel_send_wq->doorbell_idx].qp; - MLX_FILL_1 ( qp_db_rec, 0, - counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); - - /* Ring doorbell register */ - MLX_FILL_4 ( &db_reg.send, 0, - nopcode, ARBEL_OPCODE_SEND, - f, 1, - wqe_counter, ( wq->next_idx & 0xffff ), - wqe_cnt, 1 ); - MLX_FILL_2 ( &db_reg.send, 1, - nds, nds, - qpn, qp->qpn ); - arbel_ring_doorbell ( arbel, &db_reg, POST_SND_OFFSET ); - - /* Update work queue's index */ - wq->next_idx++; - - return 0; -} - -/** - * Post receive work queue entry - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v iobuf I/O buffer - * @ret rc Return status code - */ -static int arbel_post_recv ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct io_buffer *iobuf ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_queue_pair *arbel_qp = qp->dev_priv; - struct ib_work_queue *wq = &qp->recv; - struct arbel_recv_work_queue *arbel_recv_wq = &arbel_qp->recv; - struct arbelprm_recv_wqe *wqe; - union arbelprm_doorbell_record *db_rec; - unsigned int wqe_idx_mask; - - /* Allocate work queue entry */ - wqe_idx_mask = ( wq->num_wqes - 1 ); - if ( wq->iobufs[wq->next_idx & wqe_idx_mask] ) { - DBGC ( arbel, "Arbel %p receive queue full", arbel ); - return -ENOBUFS; - } - wq->iobufs[wq->next_idx & wqe_idx_mask] = iobuf; - wqe = &arbel_recv_wq->wqe[wq->next_idx & wqe_idx_mask].recv; - - /* Construct work queue entry */ - MLX_FILL_1 ( &wqe->data[0], 0, byte_count, iob_tailroom ( iobuf ) ); - MLX_FILL_1 ( &wqe->data[0], 1, l_key, arbel->reserved_lkey ); - MLX_FILL_1 ( &wqe->data[0], 3, - local_address_l, virt_to_bus ( iobuf->data ) ); - - /* Update doorbell record */ - barrier(); - db_rec = &arbel->db_rec[arbel_recv_wq->doorbell_idx]; - MLX_FILL_1 ( &db_rec->qp, 0, - counter, ( ( wq->next_idx + 1 ) & 0xffff ) ); - - /* Update work queue's index */ - wq->next_idx++; - - return 0; -} - -/** - * Handle completion - * - * @v ibdev Infiniband device - * @v cq Completion queue - * @v cqe Hardware completion queue entry - * @v complete_send Send completion handler - * @v complete_recv Receive completion handler - * @ret rc Return status code - */ -static int arbel_complete ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - union arbelprm_completion_entry *cqe, - ib_completer_t complete_send, - ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; - struct ib_completion completion; - struct ib_work_queue *wq; - struct ib_queue_pair *qp; - struct arbel_queue_pair *arbel_qp; - struct arbel_send_work_queue *arbel_send_wq; - struct arbel_recv_work_queue *arbel_recv_wq; - struct arbelprm_recv_wqe *recv_wqe; - struct io_buffer *iobuf; - ib_completer_t complete; - unsigned int opcode; - unsigned long qpn; - int is_send; - unsigned long wqe_adr; - unsigned int wqe_idx; - int rc = 0; - - /* Parse completion */ - memset ( &completion, 0, sizeof ( completion ) ); - qpn = MLX_GET ( &cqe->normal, my_qpn ); - is_send = MLX_GET ( &cqe->normal, s ); - wqe_adr = ( MLX_GET ( &cqe->normal, wqe_adr ) << 6 ); - opcode = MLX_GET ( &cqe->normal, opcode ); - if ( opcode >= ARBEL_OPCODE_RECV_ERROR ) { - /* "s" field is not valid for error opcodes */ - is_send = ( opcode == ARBEL_OPCODE_SEND_ERROR ); - completion.syndrome = MLX_GET ( &cqe->error, syndrome ); - DBGC ( arbel, "Arbel %p CPN %lx syndrome %x vendor %lx\n", - arbel, cq->cqn, completion.syndrome, - MLX_GET ( &cqe->error, vendor_code ) ); - rc = -EIO; - /* Don't return immediately; propagate error to completer */ - } - - /* Identify work queue */ - wq = ib_find_wq ( cq, qpn, is_send ); - if ( ! wq ) { - DBGC ( arbel, "Arbel %p CQN %lx unknown %s QPN %lx\n", - arbel, cq->cqn, ( is_send ? "send" : "recv" ), qpn ); - return -EIO; - } - qp = wq->qp; - arbel_qp = qp->dev_priv; - arbel_send_wq = &arbel_qp->send; - arbel_recv_wq = &arbel_qp->recv; - - /* Identify work queue entry index */ - if ( is_send ) { - wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_send_wq->wqe ) ) / - sizeof ( arbel_send_wq->wqe[0] ) ); - assert ( wqe_idx < qp->send.num_wqes ); - } else { - wqe_idx = ( ( wqe_adr - virt_to_bus ( arbel_recv_wq->wqe ) ) / - sizeof ( arbel_recv_wq->wqe[0] ) ); - assert ( wqe_idx < qp->recv.num_wqes ); - } - - /* Identify I/O buffer */ - iobuf = wq->iobufs[wqe_idx]; - if ( ! iobuf ) { - DBGC ( arbel, "Arbel %p CQN %lx QPN %lx empty WQE %x\n", - arbel, cq->cqn, qpn, wqe_idx ); - return -EIO; - } - wq->iobufs[wqe_idx] = NULL; - - /* Fill in length for received packets */ - if ( ! is_send ) { - completion.len = MLX_GET ( &cqe->normal, byte_cnt ); - recv_wqe = &arbel_recv_wq->wqe[wqe_idx].recv; - assert ( MLX_GET ( &recv_wqe->data[0], local_address_l ) == - virt_to_bus ( iobuf->data ) ); - assert ( MLX_GET ( &recv_wqe->data[0], byte_count ) == - iob_tailroom ( iobuf ) ); - MLX_FILL_1 ( &recv_wqe->data[0], 0, byte_count, 0 ); - MLX_FILL_1 ( &recv_wqe->data[0], 1, - l_key, ARBEL_INVALID_LKEY ); - if ( completion.len > iob_tailroom ( iobuf ) ) { - DBGC ( arbel, "Arbel %p CQN %lx QPN %lx IDX %x " - "overlength received packet length %zd\n", - arbel, cq->cqn, qpn, wqe_idx, completion.len ); - return -EIO; +/* NIC specific static variables go here */ + +int prompt_key(int secs, unsigned char *ch_p) +{ + unsigned long tmo; + unsigned char ch; + + for (tmo = currticks() + secs * TICKS_PER_SEC; currticks() < tmo;) { + if (iskey()) { + ch = getchar(); + /* toupper does not work ... */ + if (ch == 'v') + ch = 'V'; + if (ch == 'i') + ch = 'I'; + if ((ch=='V') || (ch=='I')) { + *ch_p = ch; + return 1; + } } } - /* Pass off to caller's completion handler */ - complete = ( is_send ? complete_send : complete_recv ); - complete ( ibdev, qp, &completion, iobuf ); - - return rc; -} - -/** - * Drain event queue - * - * @v arbel Arbel device - */ -static void arbel_drain_eq ( struct arbel *arbel ) { -#warning "drain the event queue" - drain_eq(); -} - -/** - * Poll completion queue - * - * @v ibdev Infiniband device - * @v cq Completion queue - * @v complete_send Send completion handler - * @v complete_recv Receive completion handler - */ -static void arbel_poll_cq ( struct ib_device *ibdev, - struct ib_completion_queue *cq, - ib_completer_t complete_send, - ib_completer_t complete_recv ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbel_completion_queue *arbel_cq = cq->dev_priv; - struct arbelprm_cq_ci_db_record *ci_db_rec; - union arbelprm_completion_entry *cqe; - unsigned int cqe_idx_mask; - int rc; - - /* Drain the event queue */ - arbel_drain_eq ( arbel ); - - while ( 1 ) { - /* Look for completion entry */ - cqe_idx_mask = ( cq->num_cqes - 1 ); - cqe = &arbel_cq->cqe[cq->next_idx & cqe_idx_mask]; - if ( MLX_GET ( &cqe->normal, owner ) != 0 ) { - /* Entry still owned by hardware; end of poll */ - break; - } - - /* Handle completion */ - if ( ( rc = arbel_complete ( ibdev, cq, cqe, complete_send, - complete_recv ) ) != 0 ) { - DBGC ( arbel, "Arbel %p failed to complete: %s\n", - arbel, strerror ( rc ) ); - DBGC_HD ( arbel, cqe, sizeof ( *cqe ) ); - } - - /* Return ownership to hardware */ - MLX_FILL_1 ( &cqe->normal, 7, owner, 1 ); - barrier(); - /* Update completion queue's index */ - cq->next_idx++; - /* Update doorbell record */ - ci_db_rec = &arbel->db_rec[arbel_cq->ci_doorbell_idx].cq_ci; - MLX_FILL_1 ( ci_db_rec, 0, - counter, ( cq->next_idx & 0xffffffffUL ) ); - } -} - -/*************************************************************************** - * - * Multicast group operations - * - *************************************************************************** - */ - -/** - * Attach to multicast group - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v gid Multicast GID - * @ret rc Return status code - */ -static int arbel_mcast_attach ( struct ib_device *ibdev, - struct ib_queue_pair *qp, - struct ib_gid *gid ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbelprm_mgm_hash hash; - struct arbelprm_mgm_entry mgm; - unsigned int index; - int rc; - - /* Generate hash table index */ - if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not hash GID: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - index = MLX_GET ( &hash, hash ); - - /* Check for existing hash table entry */ - if ( ( rc = arbel_cmd_read_mgm ( arbel, index, &mgm ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not read MGM %#x: %s\n", - arbel, index, strerror ( rc ) ); - return rc; - } - if ( MLX_GET ( &mgm, mgmqp_0.qi ) != 0 ) { - /* FIXME: this implementation allows only a single QP - * per multicast group, and doesn't handle hash - * collisions. Sufficient for IPoIB but may need to - * be extended in future. - */ - DBGC ( arbel, "Arbel %p MGID index %#x already in use\n", - arbel, index ); - return -EBUSY; - } - - /* Update hash table entry */ - MLX_FILL_2 ( &mgm, 8, - mgmqp_0.qpn_i, qp->qpn, - mgmqp_0.qi, 1 ); - memcpy ( &mgm.u.dwords[4], gid, sizeof ( *gid ) ); - if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", - arbel, index, strerror ( rc ) ); - return rc; - } - return 0; } -/** - * Detach from multicast group - * - * @v ibdev Infiniband device - * @v qp Queue pair - * @v gid Multicast GID - */ -static void arbel_mcast_detach ( struct ib_device *ibdev, - struct ib_queue_pair *qp __unused, - struct ib_gid *gid ) { - struct arbel *arbel = ibdev->dev_priv; - struct arbelprm_mgm_hash hash; - struct arbelprm_mgm_entry mgm; - unsigned int index; - int rc; - - /* Generate hash table index */ - if ( ( rc = arbel_cmd_mgid_hash ( arbel, gid, &hash ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not hash GID: %s\n", - arbel, strerror ( rc ) ); - return; - } - index = MLX_GET ( &hash, hash ); - - /* Clear hash table entry */ - memset ( &mgm, 0, sizeof ( mgm ) ); - if ( ( rc = arbel_cmd_write_mgm ( arbel, index, &mgm ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not write MGM %#x: %s\n", - arbel, index, strerror ( rc ) ); - return; - } -} - -/** Arbel Infiniband operations */ -static struct ib_device_operations arbel_ib_operations = { - .create_cq = arbel_create_cq, - .destroy_cq = arbel_destroy_cq, - .create_qp = arbel_create_qp, - .destroy_qp = arbel_destroy_qp, - .post_send = arbel_post_send, - .post_recv = arbel_post_recv, - .poll_cq = arbel_poll_cq, - .mcast_attach = arbel_mcast_attach, - .mcast_detach = arbel_mcast_detach, -}; - -/*************************************************************************** - * - * MAD IFC operations - * - *************************************************************************** - */ - -static int arbel_mad_ifc ( struct arbel *arbel, - union arbelprm_mad *mad ) { - struct ib_mad_hdr *hdr = &mad->mad.mad_hdr; - int rc; - - hdr->base_version = IB_MGMT_BASE_VERSION; - if ( ( rc = arbel_cmd_mad_ifc ( arbel, mad ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not issue MAD IFC: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - if ( hdr->status != 0 ) { - DBGC ( arbel, "Arbel %p MAD IFC status %04x\n", - arbel, ntohs ( hdr->status ) ); - return -EIO; - } - return 0; -} - -static int arbel_get_port_info ( struct arbel *arbel, - struct ib_mad_port_info *port_info ) { - union arbelprm_mad mad; - struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; - int rc; - - memset ( &mad, 0, sizeof ( mad ) ); - hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - hdr->class_version = 1; - hdr->method = IB_MGMT_METHOD_GET; - hdr->attr_id = htons ( IB_SMP_ATTR_PORT_INFO ); - hdr->attr_mod = htonl ( PXE_IB_PORT ); - if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get port info: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - memcpy ( port_info, &mad.mad.port_info, sizeof ( *port_info ) ); - return 0; -} - -static int arbel_get_guid_info ( struct arbel *arbel, - struct ib_mad_guid_info *guid_info ) { - union arbelprm_mad mad; - struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; - int rc; - - memset ( &mad, 0, sizeof ( mad ) ); - hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - hdr->class_version = 1; - hdr->method = IB_MGMT_METHOD_GET; - hdr->attr_id = htons ( IB_SMP_ATTR_GUID_INFO ); - if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get GUID info: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - memcpy ( guid_info, &mad.mad.guid_info, sizeof ( *guid_info ) ); - return 0; -} - -static int arbel_get_pkey_table ( struct arbel *arbel, - struct ib_mad_pkey_table *pkey_table ) { - union arbelprm_mad mad; - struct ib_mad_hdr *hdr = &mad.mad.mad_hdr; - int rc; - - memset ( &mad, 0, sizeof ( mad ) ); - hdr->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; - hdr->class_version = 1; - hdr->method = IB_MGMT_METHOD_GET; - hdr->attr_id = htons ( IB_SMP_ATTR_PKEY_TABLE ); - if ( ( rc = arbel_mad_ifc ( arbel, &mad ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get pkey table: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - memcpy ( pkey_table, &mad.mad.pkey_table, sizeof ( *pkey_table ) ); - return 0; -} - -static int arbel_get_port_gid ( struct arbel *arbel, - struct ib_gid *port_gid ) { - union { - /* This union exists just to save stack space */ - struct ib_mad_port_info port_info; - struct ib_mad_guid_info guid_info; - } u; - int rc; - - /* Port info gives us the first half of the port GID */ - if ( ( rc = arbel_get_port_info ( arbel, &u.port_info ) ) != 0 ) - return rc; - memcpy ( &port_gid->u.bytes[0], u.port_info.gid_prefix, 8 ); - - /* GUID info gives us the second half of the port GID */ - if ( ( rc = arbel_get_guid_info ( arbel, &u.guid_info ) ) != 0 ) - return rc; - memcpy ( &port_gid->u.bytes[8], u.guid_info.gid_local, 8 ); - - return 0; -} - -static int arbel_get_sm_lid ( struct arbel *arbel, - unsigned long *sm_lid ) { - struct ib_mad_port_info port_info; - int rc; - - if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) - return rc; - *sm_lid = ntohs ( port_info.mastersm_lid ); - return 0; -} - -static int arbel_get_pkey ( struct arbel *arbel, unsigned int *pkey ) { - struct ib_mad_pkey_table pkey_table; - int rc; - - if ( ( rc = arbel_get_pkey_table ( arbel, &pkey_table ) ) != 0 ) - return rc; - *pkey = ntohs ( pkey_table.pkey[0][0] ); - return 0; -} - -/** - * Wait for link up - * - * @v arbel Arbel device - * @ret rc Return status code - * - * This function shouldn't really exist. Unfortunately, IB links take - * a long time to come up, and we can't get various key parameters - * e.g. our own IPoIB MAC address without information from the subnet - * manager). We should eventually make link-up an asynchronous event. - */ -static int arbel_wait_for_link ( struct arbel *arbel ) { - struct ib_mad_port_info port_info; - unsigned int retries; - int rc; - - printf ( "Waiting for Infiniband link-up..." ); - for ( retries = 20 ; retries ; retries-- ) { - if ( ( rc = arbel_get_port_info ( arbel, &port_info ) ) != 0 ) - continue; - if ( ( ( port_info.port_state__link_speed_supported ) & 0xf ) - == 4 ) { - printf ( "ok\n" ); - return 0; - } - printf ( "." ); - sleep ( 1 ); - } - printf ( "failed\n" ); - return -ENODEV; -}; - -/** - * Get MAD parameters - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_get_mad_params ( struct ib_device *ibdev ) { - struct arbel *arbel = ibdev->dev_priv; - int rc; - - /* Get subnet manager LID */ - if ( ( rc = arbel_get_sm_lid ( arbel, &ibdev->sm_lid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine subnet manager " - "LID: %s\n", arbel, strerror ( rc ) ); - return rc; - } - - /* Get port GID */ - if ( ( rc = arbel_get_port_gid ( arbel, &ibdev->port_gid ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine port GID: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - - /* Get partition key */ - if ( ( rc = arbel_get_pkey ( arbel, &ibdev->pkey ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not determine partition key: " - "%s\n", arbel, strerror ( rc ) ); - return rc; - } - - return 0; -} - -/*************************************************************************** - * - * Firmware control - * - *************************************************************************** - */ - -/** - * Start firmware running - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_start_firmware ( struct arbel *arbel ) { - struct arbelprm_query_fw fw; - struct arbelprm_access_lam lam; - struct arbelprm_virtual_physical_mapping map_fa; - unsigned int fw_pages; - unsigned int log2_fw_pages; - size_t fw_size; - physaddr_t fw_base; - int rc; - - /* Get firmware parameters */ - if ( ( rc = arbel_cmd_query_fw ( arbel, &fw ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not query firmware: %s\n", - arbel, strerror ( rc ) ); - goto err_query_fw; - } - DBGC ( arbel, "Arbel %p firmware version %ld.%ld.%ld\n", arbel, - MLX_GET ( &fw, fw_rev_major ), MLX_GET ( &fw, fw_rev_minor ), - MLX_GET ( &fw, fw_rev_subminor ) ); - fw_pages = MLX_GET ( &fw, fw_pages ); - log2_fw_pages = fls ( fw_pages - 1 ); - fw_pages = ( 1 << log2_fw_pages ); - DBGC ( arbel, "Arbel %p requires %d kB for firmware\n", - arbel, ( fw_pages * 4 ) ); - - /* Enable locally-attached memory. Ignore failure; there may - * be no attached memory. +/************************************************************************** +IRQ - handle interrupts +***************************************************************************/ +static void mt25218_irq(struct nic *nic, irq_action_t action) +{ + /* This routine is somewhat optional. Etherboot itself + * doesn't use interrupts, but they are required under some + * circumstances when we're acting as a PXE stack. + * + * If you don't implement this routine, the only effect will + * be that your driver cannot be used via Etherboot's UNDI + * API. This won't affect programs that use only the UDP + * portion of the PXE API, such as pxelinux. */ - arbel_cmd_enable_lam ( arbel, &lam ); - /* Allocate firmware pages and map firmware area */ - fw_size = ( fw_pages * 4096 ); - arbel->firmware_area = umalloc ( fw_size ); - if ( ! arbel->firmware_area ) { - rc = -ENOMEM; - goto err_alloc_fa; + if (0) { + nic = NULL; } - fw_base = ( user_to_phys ( arbel->firmware_area, fw_size ) & - ~( fw_size - 1 ) ); - DBGC ( arbel, "Arbel %p firmware area at physical [%lx,%lx)\n", - arbel, fw_base, ( fw_base + fw_size ) ); - memset ( &map_fa, 0, sizeof ( map_fa ) ); - MLX_FILL_2 ( &map_fa, 3, - log2size, log2_fw_pages, - pa_l, ( fw_base >> 12 ) ); - if ( ( rc = arbel_cmd_map_fa ( arbel, &map_fa ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not map firmware: %s\n", - arbel, strerror ( rc ) ); - goto err_map_fa; + switch (action) { + case DISABLE: + case ENABLE: + /* Set receive interrupt enabled/disabled state */ + /* + outb ( action == ENABLE ? IntrMaskEnabled : IntrMaskDisabled, + nic->ioaddr + IntrMaskRegister ); + */ + break; + case FORCE: + /* Force NIC to generate a receive interrupt */ + /* + outb ( ForceInterrupt, nic->ioaddr + IntrForceRegister ); + */ + break; } - - /* Start firmware */ - if ( ( rc = arbel_cmd_run_fw ( arbel ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not run firmware: %s\n", - arbel, strerror ( rc ) ); - goto err_run_fw; - } - - DBGC ( arbel, "Arbel %p firmware started\n", arbel ); - return 0; - - err_run_fw: - arbel_cmd_unmap_fa ( arbel ); - err_map_fa: - ufree ( arbel->firmware_area ); - arbel->firmware_area = UNULL; - err_alloc_fa: - err_query_fw: - return rc; } -/** - * Stop firmware running - * - * @v arbel Arbel device - */ -static void arbel_stop_firmware ( struct arbel *arbel ) { - int rc; +/************************************************************************** +POLL - Wait for a frame +***************************************************************************/ +static int mt25218_poll(struct nic *nic, int retrieve) +{ + /* Work out whether or not there's an ethernet packet ready to + * read. Return 0 if not. + */ + /* + if ( ! ) return 0; + */ - if ( ( rc = arbel_cmd_unmap_fa ( arbel ) ) != 0 ) { - DBGC ( arbel, "Arbel %p FATAL could not stop firmware: %s\n", - arbel, strerror ( rc ) ); - /* Leak memory and return; at least we avoid corruption */ - return; + /* retrieve==0 indicates that we are just checking for the + * presence of a packet but don't want to read it just yet. + */ + /* + if ( ! retrieve ) return 1; + */ + + /* Copy data to nic->packet. Data should include the + * link-layer header (dest MAC, source MAC, type). + * Store length of data in nic->packetlen. + * Return true to indicate a packet has been read. + */ + /* + nic->packetlen = ; + memcpy ( nic->packet, , ); + return 1; + */ + unsigned int size; + int rc; + rc = poll_imp(nic, retrieve, &size); + if (rc) { + return 0; } - ufree ( arbel->firmware_area ); - arbel->firmware_area = UNULL; + + if (size == 0) { + return 0; + } + + nic->packetlen = size; + + return 1; } -/*************************************************************************** - * - * Infinihost Context Memory management - * - *************************************************************************** - */ - -/** - * Get device limits - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_get_limits ( struct arbel *arbel ) { - struct arbelprm_query_dev_lim dev_lim; +/************************************************************************** +TRANSMIT - Transmit a frame +***************************************************************************/ +static void mt25218_transmit(struct nic *nic, const char *dest, /* Destination */ + unsigned int type, /* Type */ + unsigned int size, /* size */ + const char *packet) +{ /* Packet */ int rc; - if ( ( rc = arbel_cmd_query_dev_lim ( arbel, &dev_lim ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not get device limits: %s\n", - arbel, strerror ( rc ) ); - return rc; + /* Transmit packet to dest MAC address. You will need to + * construct the link-layer header (dest MAC, source MAC, + * type). + */ + if (nic) { + rc = transmit_imp(dest, type, packet, size); + if (rc) + eprintf("tranmit error"); + } +} + +/************************************************************************** +DISABLE - Turn off ethernet interface +***************************************************************************/ +static void mt25218_disable(struct dev *dev) +{ + /* put the card in its initial state */ + /* This function serves 3 purposes. + * This disables DMA and interrupts so we don't receive + * unexpected packets or interrupts from the card after + * etherboot has finished. + * This frees resources so etherboot may use + * this driver on another interface + * This allows etherboot to reinitialize the interface + * if something is something goes wrong. + */ + if (dev || 1) { // ???? + disable_imp(); + } +} + +/************************************************************************** +PROBE - Look for an adapter, this routine's visible to the outside +***************************************************************************/ + +static int mt25218_probe(struct dev *dev, struct pci_device *pci) +{ + struct nic *nic = (struct nic *)dev; + int rc; + unsigned char user_request; + + if (pci->vendor != MELLANOX_VENDOR_ID) { + eprintf(""); + return 0; } - arbel->limits.reserved_qps = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_qps ) ); - arbel->limits.qpc_entry_size = MLX_GET ( &dev_lim, qpc_entry_sz ); - arbel->limits.eqpc_entry_size = MLX_GET ( &dev_lim, eqpc_entry_sz ); - arbel->limits.reserved_srqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_srqs ) ); - arbel->limits.srqc_entry_size = MLX_GET ( &dev_lim, srq_entry_sz ); - arbel->limits.reserved_ees = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_ees ) ); - arbel->limits.eec_entry_size = MLX_GET ( &dev_lim, eec_entry_sz ); - arbel->limits.eeec_entry_size = MLX_GET ( &dev_lim, eeec_entry_sz ); - arbel->limits.reserved_cqs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_cqs ) ); - arbel->limits.cqc_entry_size = MLX_GET ( &dev_lim, cqc_entry_sz ); - arbel->limits.reserved_mtts = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mtts ) ); - arbel->limits.mtt_entry_size = MLX_GET ( &dev_lim, mtt_entry_sz ); - arbel->limits.reserved_mrws = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_mrws ) ); - arbel->limits.mpt_entry_size = MLX_GET ( &dev_lim, mpt_entry_sz ); - arbel->limits.reserved_rdbs = - ( 1 << MLX_GET ( &dev_lim, log2_rsvd_rdbs ) ); - arbel->limits.eqc_entry_size = MLX_GET ( &dev_lim, eqc_entry_sz ); - arbel->limits.reserved_uars = MLX_GET ( &dev_lim, num_rsvd_uars ); + printf("\n"); + printf("Mellanox Technologies LTD - Boot over IB implementaion\n"); + printf("Build version = %s\n\n", build_revision); + verbose_messages = 0; + print_info = 0; + printf("Press within 3 seconds:\n"); + printf("V - to increase verbosity\n"); + printf("I - to print information\n"); + if (prompt_key(3, &user_request)) { + if (user_request == 'V') { + printf("User selected verbose messages\n"); + verbose_messages = 1; + } + else if (user_request == 'I') { + printf("User selected to print information\n"); + print_info = 1; + } + } + printf("\n"); + + adjust_pci_device(pci); + + nic->priv_data = NULL; + rc = probe_imp(pci, nic); + + /* give the user a chance to look at the info */ + if (print_info) + sleep(5); + + if (!rc) { + /* store NIC parameters */ + nic->ioaddr = pci->ioaddr & ~3; + nic->irqno = pci->irq; + /* point to NIC specific routines */ + dev->disable = mt25218_disable; + nic->poll = mt25218_poll; + nic->transmit = mt25218_transmit; + nic->irq = mt25218_irq; + + return 1; + } + /* else */ return 0; } -/** - * Get ICM usage - * - * @v log_num_entries Log2 of the number of entries - * @v entry_size Entry size - * @ret usage Usage size in ICM - */ -static size_t icm_usage ( unsigned int log_num_entries, size_t entry_size ) { - size_t usage; - - usage = ( ( 1 << log_num_entries ) * entry_size ); - usage = ( ( usage + 4095 ) & ~4095 ); - return usage; -} - -/** - * Allocate ICM - * - * @v arbel Arbel device - * @v init_hca INIT_HCA structure to fill in - * @ret rc Return status code - */ -static int arbel_alloc_icm ( struct arbel *arbel, - struct arbelprm_init_hca *init_hca ) { - struct arbelprm_scalar_parameter icm_size; - struct arbelprm_scalar_parameter icm_aux_size; - struct arbelprm_virtual_physical_mapping map_icm_aux; - struct arbelprm_virtual_physical_mapping map_icm; - size_t icm_offset = 0; - unsigned int log_num_qps, log_num_srqs, log_num_ees, log_num_cqs; - unsigned int log_num_mtts, log_num_mpts, log_num_rdbs, log_num_eqs; - int rc; - - icm_offset = ( ( arbel->limits.reserved_uars + 1 ) << 12 ); - - /* Queue pair contexts */ - log_num_qps = fls ( arbel->limits.reserved_qps + ARBEL_MAX_QPS - 1 ); - MLX_FILL_2 ( init_hca, 13, - qpc_eec_cqc_eqc_rdb_parameters.qpc_base_addr_l, - ( icm_offset >> 7 ), - qpc_eec_cqc_eqc_rdb_parameters.log_num_of_qp, - log_num_qps ); - DBGC ( arbel, "Arbel %p ICM QPC base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); - - /* Extended queue pair contexts */ - MLX_FILL_1 ( init_hca, 25, - qpc_eec_cqc_eqc_rdb_parameters.eqpc_base_addr_l, - icm_offset ); - DBGC ( arbel, "Arbel %p ICM EQPC base = %zx\n", arbel, icm_offset ); - // icm_offset += icm_usage ( log_num_qps, arbel->limits.eqpc_entry_size ); - icm_offset += icm_usage ( log_num_qps, arbel->limits.qpc_entry_size ); - - /* Shared receive queue contexts */ - log_num_srqs = fls ( arbel->limits.reserved_srqs - 1 ); - MLX_FILL_2 ( init_hca, 19, - qpc_eec_cqc_eqc_rdb_parameters.srqc_base_addr_l, - ( icm_offset >> 5 ), - qpc_eec_cqc_eqc_rdb_parameters.log_num_of_srq, - log_num_srqs ); - DBGC ( arbel, "Arbel %p ICM SRQC base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_srqs, arbel->limits.srqc_entry_size ); - - /* End-to-end contexts */ - log_num_ees = fls ( arbel->limits.reserved_ees - 1 ); - MLX_FILL_2 ( init_hca, 17, - qpc_eec_cqc_eqc_rdb_parameters.eec_base_addr_l, - ( icm_offset >> 7 ), - qpc_eec_cqc_eqc_rdb_parameters.log_num_of_ee, - log_num_ees ); - DBGC ( arbel, "Arbel %p ICM EEC base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_ees, arbel->limits.eec_entry_size ); - - /* Extended end-to-end contexts */ - MLX_FILL_1 ( init_hca, 29, - qpc_eec_cqc_eqc_rdb_parameters.eeec_base_addr_l, - icm_offset ); - DBGC ( arbel, "Arbel %p ICM EEEC base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_ees, arbel->limits.eeec_entry_size ); - - /* Completion queue contexts */ - log_num_cqs = fls ( arbel->limits.reserved_cqs + ARBEL_MAX_CQS - 1 ); - MLX_FILL_2 ( init_hca, 21, - qpc_eec_cqc_eqc_rdb_parameters.cqc_base_addr_l, - ( icm_offset >> 6 ), - qpc_eec_cqc_eqc_rdb_parameters.log_num_of_cq, - log_num_cqs ); - DBGC ( arbel, "Arbel %p ICM CQC base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_cqs, arbel->limits.cqc_entry_size ); - - /* Memory translation table */ - log_num_mtts = fls ( arbel->limits.reserved_mtts - 1 ); - MLX_FILL_1 ( init_hca, 65, - tpt_parameters.mtt_base_addr_l, icm_offset ); - DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_mtts, arbel->limits.mtt_entry_size ); - - /* Memory protection table */ - log_num_mpts = fls ( arbel->limits.reserved_mrws + 1 - 1 ); - MLX_FILL_1 ( init_hca, 61, - tpt_parameters.mpt_base_adr_l, icm_offset ); - MLX_FILL_1 ( init_hca, 62, - tpt_parameters.log_mpt_sz, log_num_mpts ); - DBGC ( arbel, "Arbel %p ICM MTT base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_mpts, arbel->limits.mpt_entry_size ); - - /* RDMA something or other */ - log_num_rdbs = fls ( arbel->limits.reserved_rdbs - 1 ); - MLX_FILL_1 ( init_hca, 37, - qpc_eec_cqc_eqc_rdb_parameters.rdb_base_addr_l, - icm_offset ); - DBGC ( arbel, "Arbel %p ICM RDB base = %zx\n", arbel, icm_offset ); - icm_offset += icm_usage ( log_num_rdbs, 32 ); - - /* Event queue contexts */ - log_num_eqs = 6; - MLX_FILL_2 ( init_hca, 33, - qpc_eec_cqc_eqc_rdb_parameters.eqc_base_addr_l, - ( icm_offset >> 6 ), - qpc_eec_cqc_eqc_rdb_parameters.log_num_eq, - log_num_eqs ); - DBGC ( arbel, "Arbel %p ICM EQ base = %zx\n", arbel, icm_offset ); - icm_offset += ( ( 1 << log_num_eqs ) * arbel->limits.eqc_entry_size ); - - /* Multicast table */ - MLX_FILL_1 ( init_hca, 49, - multicast_parameters.mc_base_addr_l, icm_offset ); - MLX_FILL_1 ( init_hca, 52, - multicast_parameters.log_mc_table_entry_sz, - fls ( sizeof ( struct arbelprm_mgm_entry ) - 1 ) ); - MLX_FILL_1 ( init_hca, 53, - multicast_parameters.mc_table_hash_sz, 8 ); - MLX_FILL_1 ( init_hca, 54, - multicast_parameters.log_mc_table_sz, 3 ); - DBGC ( arbel, "Arbel %p ICM MC base = %zx\n", arbel, icm_offset ); - icm_offset += ( 8 * sizeof ( struct arbelprm_mgm_entry ) ); - - arbel->icm_len = icm_offset; - arbel->icm_len = ( ( arbel->icm_len + 4095 ) & ~4095 ); - - /* Get ICM auxiliary area size */ - memset ( &icm_size, 0, sizeof ( icm_size ) ); - MLX_FILL_1 ( &icm_size, 1, value, arbel->icm_len ); - if ( ( rc = arbel_cmd_set_icm_size ( arbel, &icm_size, - &icm_aux_size ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not set ICM size: %s\n", - arbel, strerror ( rc ) ); - goto err_set_icm_size; - } - arbel->icm_aux_len = ( MLX_GET ( &icm_aux_size, value ) * 4096 ); - - /* Allocate ICM data and auxiliary area */ - DBGC ( arbel, "Arbel %p requires %zd kB ICM and %zd kB AUX ICM\n", - arbel, ( arbel->icm_len / 1024 ), - ( arbel->icm_aux_len / 1024 ) ); - arbel->icm = umalloc ( arbel->icm_len + arbel->icm_aux_len ); - if ( ! arbel->icm ) { - rc = -ENOMEM; - goto err_alloc; - } - - /* Map ICM auxiliary area */ - memset ( &map_icm_aux, 0, sizeof ( map_icm_aux ) ); - MLX_FILL_2 ( &map_icm_aux, 3, - log2size, fls ( ( arbel->icm_aux_len / 4096 ) - 1 ), - pa_l, - ( user_to_phys ( arbel->icm, arbel->icm_len ) >> 12 ) ); - if ( ( rc = arbel_cmd_map_icm_aux ( arbel, &map_icm_aux ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not map AUX ICM: %s\n", - arbel, strerror ( rc ) ); - goto err_map_icm_aux; - } - - /* MAP ICM area */ - memset ( &map_icm, 0, sizeof ( map_icm ) ); - MLX_FILL_2 ( &map_icm, 3, - log2size, fls ( ( arbel->icm_len / 4096 ) - 1 ), - pa_l, ( user_to_phys ( arbel->icm, 0 ) >> 12 ) ); - if ( ( rc = arbel_cmd_map_icm ( arbel, &map_icm ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not map ICM: %s\n", - arbel, strerror ( rc ) ); - goto err_map_icm; - } - - return 0; - - arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); - err_map_icm: - arbel_cmd_unmap_icm_aux ( arbel ); - err_map_icm_aux: - ufree ( arbel->icm ); - arbel->icm = UNULL; - err_alloc: - err_set_icm_size: - return rc; -} - -/** - * Free ICM - * - * @v arbel Arbel device - */ -static void arbel_free_icm ( struct arbel *arbel ) { - arbel_cmd_unmap_icm ( arbel, ( arbel->icm_len / 4096 ) ); - arbel_cmd_unmap_icm_aux ( arbel ); - ufree ( arbel->icm ); - arbel->icm = UNULL; -} - -/*************************************************************************** - * - * Infiniband link-layer operations - * - *************************************************************************** - */ - -/** - * Initialise Infiniband link - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_init_ib ( struct arbel *arbel ) { - struct arbelprm_init_ib init_ib; - int rc; - - memset ( &init_ib, 0, sizeof ( init_ib ) ); - MLX_FILL_3 ( &init_ib, 0, - mtu_cap, ARBEL_MTU_2048, - port_width_cap, 3, - vl_cap, 1 ); - MLX_FILL_1 ( &init_ib, 1, max_gid, 1 ); - MLX_FILL_1 ( &init_ib, 2, max_pkey, 64 ); - if ( ( rc = arbel_cmd_init_ib ( arbel, PXE_IB_PORT, - &init_ib ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not intialise IB: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - - return 0; -} - -/** - * Close Infiniband link - * - * @v arbel Arbel device - */ -static void arbel_close_ib ( struct arbel *arbel ) { - int rc; - - if ( ( rc = arbel_cmd_close_ib ( arbel, PXE_IB_PORT ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not close IB: %s\n", - arbel, strerror ( rc ) ); - /* Nothing we can do about this */ - } -} - -/*************************************************************************** - * - * PCI interface - * - *************************************************************************** - */ - -/** - * Set up memory protection table - * - * @v arbel Arbel device - * @ret rc Return status code - */ -static int arbel_setup_mpt ( struct arbel *arbel ) { - struct arbelprm_mpt mpt; - uint32_t key; - int rc; - - /* Derive key */ - key = ( arbel->limits.reserved_mrws | ARBEL_MKEY_PREFIX ); - arbel->reserved_lkey = ( ( key << 8 ) | ( key >> 24 ) ); - - /* Initialise memory protection table */ - memset ( &mpt, 0, sizeof ( mpt ) ); - MLX_FILL_4 ( &mpt, 0, - r_w, 1, - pa, 1, - lr, 1, - lw, 1 ); - MLX_FILL_1 ( &mpt, 2, mem_key, key ); - MLX_FILL_1 ( &mpt, 3, pd, ARBEL_GLOBAL_PD ); - MLX_FILL_1 ( &mpt, 6, reg_wnd_len_h, 0xffffffffUL ); - MLX_FILL_1 ( &mpt, 7, reg_wnd_len_l, 0xffffffffUL ); - if ( ( rc = arbel_cmd_sw2hw_mpt ( arbel, arbel->limits.reserved_mrws, - &mpt ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not set up MPT: %s\n", - arbel, strerror ( rc ) ); - return rc; - } - - return 0; -} - -/** - * Probe PCI device - * - * @v pci PCI device - * @v id PCI ID - * @ret rc Return status code - */ -static int arbel_probe ( struct pci_device *pci, - const struct pci_device_id *id __unused ) { - struct ib_device *ibdev; - struct arbel *arbel; - struct arbelprm_init_hca init_hca; - int rc; - - /* Allocate Infiniband device */ - ibdev = alloc_ibdev ( sizeof ( *arbel ) ); - if ( ! ibdev ) { - rc = -ENOMEM; - goto err_ibdev; - } - ibdev->op = &arbel_ib_operations; - pci_set_drvdata ( pci, ibdev ); - ibdev->dev = &pci->dev; - arbel = ibdev->dev_priv; - memset ( arbel, 0, sizeof ( *arbel ) ); - - /* Fix up PCI device */ - adjust_pci_device ( pci ); - - /* Get PCI BARs */ - arbel->config = ioremap ( pci_bar_start ( pci, ARBEL_PCI_CONFIG_BAR ), - ARBEL_PCI_CONFIG_BAR_SIZE ); - arbel->uar = ioremap ( ( pci_bar_start ( pci, ARBEL_PCI_UAR_BAR ) + - ARBEL_PCI_UAR_IDX * ARBEL_PCI_UAR_SIZE ), - ARBEL_PCI_UAR_SIZE ); - - /* Allocate space for mailboxes */ - arbel->mailbox_in = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); - if ( ! arbel->mailbox_in ) { - rc = -ENOMEM; - goto err_mailbox_in; - } - arbel->mailbox_out = malloc_dma ( ARBEL_MBOX_SIZE, ARBEL_MBOX_ALIGN ); - if ( ! arbel->mailbox_out ) { - rc = -ENOMEM; - goto err_mailbox_out; - } - -#define SELF_INIT 1 - -#if SELF_INIT - /* Start firmware */ - if ( ( rc = arbel_start_firmware ( arbel ) ) != 0 ) - goto err_start_firmware; -#else - /* Initialise hardware */ - udqp_t qph; - if ( ( rc = ib_driver_init ( pci, &qph ) ) != 0 ) - goto err_ib_driver_init; -#endif - - /* Get device limits */ - if ( ( rc = arbel_get_limits ( arbel ) ) != 0 ) - goto err_get_limits; - -#if SELF_INIT - /* Allocate ICM */ - memset ( &init_hca, 0, sizeof ( init_hca ) ); - if ( ( rc = arbel_alloc_icm ( arbel, &init_hca ) ) != 0 ) - goto err_alloc_icm; - - - unsigned long uar_offset = ( arbel->limits.reserved_uars * 4096 ); - arbel->db_rec = phys_to_virt ( user_to_phys ( arbel->icm, - uar_offset ) ); - memset ( arbel->db_rec, 0, 4096 ); - union arbelprm_doorbell_record *db_rec; - db_rec = &arbel->db_rec[ARBEL_GROUP_SEPARATOR_DOORBELL]; - MLX_FILL_1 ( &db_rec->qp, 1, res, ARBEL_UAR_RES_GROUP_SEP ); - - - /* Initialise HCA */ - MLX_FILL_1 ( &init_hca, 74, uar_parameters.log_max_uars, 1 ); - if ( ( rc = arbel_cmd_init_hca ( arbel, &init_hca ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not initialise HCA: %s\n", - arbel, strerror ( rc ) ); - goto err_init_hca; - } - - /* Set up memory protection */ - if ( ( rc = arbel_setup_mpt ( arbel ) ) != 0 ) - goto err_setup_mpt; - - /* Bring up IB layer */ - if ( ( rc = arbel_init_ib ( arbel ) ) != 0 ) - goto err_init_ib; - - /* Wait for link */ - if ( ( rc = arbel_wait_for_link ( arbel ) ) != 0 ) - goto err_wait_for_link; - -#endif - - - - /* Hack up IB structures */ -#if 0 - arbel->config = memfree_pci_dev.cr_space; - arbel->uar = memfree_pci_dev.uar; - arbel->mailbox_in = dev_buffers_p->inprm_buf; - arbel->mailbox_out = dev_buffers_p->outprm_buf; -#endif -#if SELF_INIT -#else - arbel->reserved_lkey = dev_ib_data.mkey; - arbel->db_rec = dev_ib_data.uar_context_base; -#endif - // arbel->eqn = dev_ib_data.eq.eqn; - - - /* Get MAD parameters */ - if ( ( rc = arbel_get_mad_params ( ibdev ) ) != 0 ) - goto err_get_mad_params; - - DBGC ( arbel, "Arbel %p port GID is %08lx:%08lx:%08lx:%08lx\n", arbel, - htonl ( ibdev->port_gid.u.dwords[0] ), - htonl ( ibdev->port_gid.u.dwords[1] ), - htonl ( ibdev->port_gid.u.dwords[2] ), - htonl ( ibdev->port_gid.u.dwords[3] ) ); - - /* Add IPoIB device */ - if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) { - DBGC ( arbel, "Arbel %p could not add IPoIB device: %s\n", - arbel, strerror ( rc ) ); - goto err_ipoib_probe; - } - - return 0; - - err_ipoib_probe: - err_get_mad_params: - ib_driver_close ( 0 ); - err_ib_driver_init: - - err_wait_for_link: - arbel_close_ib ( arbel ); - err_init_ib: - err_setup_mpt: - arbel_cmd_close_hca ( arbel ); - err_init_hca: - arbel_free_icm ( arbel ); - err_alloc_icm: - err_get_limits: - arbel_stop_firmware ( arbel ); - err_start_firmware: - free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); - err_mailbox_out: - free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); - err_mailbox_in: - free_ibdev ( ibdev ); - err_ibdev: - return rc; -} - -/** - * Remove PCI device - * - * @v pci PCI device - */ -static void arbel_remove ( struct pci_device *pci ) { - struct ib_device *ibdev = pci_get_drvdata ( pci ); - struct arbel *arbel = ibdev->dev_priv; - - -#warning "check error sequence for probe()" - ipoib_remove ( ibdev ); - ib_driver_close ( 0 ); - arbel_stop_firmware ( arbel ); - free_dma ( arbel->mailbox_out, ARBEL_MBOX_SIZE ); - free_dma ( arbel->mailbox_in, ARBEL_MBOX_SIZE ); - free_ibdev ( ibdev ); -} - -static struct pci_device_id arbel_nics[] = { - PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), - PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), +static struct pci_id mt25218_nics[] = { + PCI_ROM(0x15b3, 0x6282, "MT25218", "MT25218 HCA driver"), + PCI_ROM(0x15b3, 0x6274, "MT25204", "MT25204 HCA driver"), }; -struct pci_driver arbel_driver __pci_driver = { - .ids = arbel_nics, - .id_count = ( sizeof ( arbel_nics ) / sizeof ( arbel_nics[0] ) ), - .probe = arbel_probe, - .remove = arbel_remove, +struct pci_driver mt25218_driver __pci_driver = { + .type = NIC_DRIVER, + .name = "MT25218", + .probe = mt25218_probe, + .ids = mt25218_nics, + .id_count = sizeof(mt25218_nics) / sizeof(mt25218_nics[0]), + .class = 0, }; diff --git a/src/drivers/net/mlx_ipoib/mt25218.h b/src/drivers/net/mlx_ipoib/mt25218.h index 702b5caf..15a3feaf 100644 --- a/src/drivers/net/mlx_ipoib/mt25218.h +++ b/src/drivers/net/mlx_ipoib/mt25218.h @@ -26,9 +26,7 @@ #define MEMFREE_CMD_RUN_FW 0xff6 #define MEMFREE_CMD_SET_ICM_SIZE 0xffd #define MEMFREE_CMD_MAP_ICM_AUX 0xffc -#define MEMFREE_CMD_UNMAP_ICM_AUX 0xffb #define MEMFREE_CMD_MAP_ICM 0xffa -#define MEMFREE_CMD_UNMAP_ICM 0xff9 #define MEMFREE_CMD_QUERY_DEV_LIM 0x003 /* @@ -146,24 +144,24 @@ /* uar context indexes */ enum { - MADS_SND_CQ_ARM_DB_IDX = MADS_SND_CQN_SN, - MADS_RCV_CQ_ARM_DB_IDX = MADS_RCV_CQN_SN, - IPOIB_SND_CQ_ARM_DB_IDX = IPOIB_SND_CQN_SN, - IPOIB_RCV_CQ_ARM_DB_IDX = IPOIB_RCV_CQN_SN, - MADS_SND_QP_DB_IDX = ( MAX_APP_CQS + MADS_QPN_SN ), - IPOIB_SND_QP_DB_IDX = ( MAX_APP_CQS + IPOIB_QPN_SN ), - GROUP_SEP_IDX = ( MAX_APP_CQS + MAX_APP_QPS ), - // START_UNMAPPED_DB_IDX, + MADS_RCV_CQ_ARM_DB_IDX, + MADS_SND_CQ_ARM_DB_IDX, + IPOIB_RCV_CQ_ARM_DB_IDX, + IPOIB_SND_CQ_ARM_DB_IDX, + MADS_SND_QP_DB_IDX, + IPOIB_SND_QP_DB_IDX, + GROUP_SEP_IDX, + START_UNMAPPED_DB_IDX, /* -------------------------- unmapped doorbell records -------------------------- */ - // END_UNMAPPED_DB_IDX, - IPOIB_RCV_QP_DB_IDX = ( 512 - MAX_APP_CQS - IPOIB_QPN_SN - 1 ), - MADS_RCV_QP_DB_IDX = ( 512 - MAX_APP_CQS - MADS_QPN_SN - 1 ), - IPOIB_RCV_CQ_CI_DB_IDX = ( 512 - IPOIB_RCV_CQN_SN - 1 ), - IPOIB_SND_CQ_CI_DB_IDX = ( 512 - IPOIB_SND_CQN_SN - 1 ), - MADS_RCV_CQ_CI_DB_IDX = ( 512 - MADS_RCV_CQN_SN - 1 ), - MADS_SND_CQ_CI_DB_IDX = ( 512 - MADS_SND_CQN_SN - 1 ), + END_UNMAPPED_DB_IDX = 505, + MADS_RCV_QP_DB_IDX = 506, + IPOIB_RCV_QP_DB_IDX = 507, + MADS_RCV_CQ_CI_DB_IDX = 508, + MADS_SND_CQ_CI_DB_IDX = 509, + IPOIB_RCV_CQ_CI_DB_IDX = 510, + IPOIB_SND_CQ_CI_DB_IDX = 511 }; /* uar resources types */ @@ -342,24 +340,6 @@ struct cq_dbell_st { __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_cmd_doorbell_st)]; } __attribute__ ((packed)); -struct qp_db_record_st { - __u8 raw[MT_STRUCT_SIZE(arbelprm_qp_db_record_st)]; -} __attribute__ ((packed)); - -struct cq_arm_db_record_st { - __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_arm_db_record_st)]; -} __attribute__ ((packed)); - -struct cq_ci_db_record_st { - __u8 raw[MT_STRUCT_SIZE(arbelprm_cq_ci_db_record_st)]; -} __attribute__ ((packed)); - -union db_record_st { - struct qp_db_record_st qp; - struct cq_arm_db_record_st cq_arm; - struct cq_ci_db_record_st cq_ci; -} __attribute__ ((packed)); - struct mad_ifc_inprm_st { union mad_u mad; } __attribute__ ((packed)); diff --git a/src/drivers/net/mlx_ipoib/mt25218_imp.c b/src/drivers/net/mlx_ipoib/mt25218_imp.c index efa37948..fe407041 100644 --- a/src/drivers/net/mlx_ipoib/mt25218_imp.c +++ b/src/drivers/net/mlx_ipoib/mt25218_imp.c @@ -45,7 +45,6 @@ static void be_to_cpu_buf(void *buf, int size) #include "ib_driver.c" #include "ipoib.c" -#if 0 static int probe_imp(struct pci_device *pci, struct nic *nic) { int rc; @@ -92,12 +91,10 @@ static int transmit_imp(const char *dest, /* Destination */ rc = ipoib_send_packet(dest, type, packet, size); if (rc) { printf("*** ERROR IN SEND FLOW ***\n"); -#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ -#endif return -1; } @@ -109,7 +106,7 @@ static void hd(void *where, int n) int i; while (n > 0) { - printf("%p ", where); + printf("%X ", where); for (i = 0; i < ((n > 16) ? 16 : n); i++) printf(" %hhX", ((char *)where)[i]); printf("\n"); @@ -224,13 +221,10 @@ static int poll_imp(struct nic *nic, int retrieve, unsigned int *size_p) return 0; fatal_handling: -#if 0 printf("restarting Etherboot\n"); sleep(1); longjmp(restart_etherboot, -1); /* we should not be here ... */ -#endif return -1; } -#endif diff --git a/src/drivers/net/mlx_ipoib/mt_version.c b/src/drivers/net/mlx_ipoib/mt_version.c index bae860c3..2dbd67a6 100644 --- a/src/drivers/net/mlx_ipoib/mt_version.c +++ b/src/drivers/net/mlx_ipoib/mt_version.c @@ -20,4 +20,4 @@ */ /* definition of the build version goes here */ -const char *build_revision= "191"; +const char *build_revision= "113"; diff --git a/src/drivers/net/mlx_ipoib/patches/dhcpd.patch b/src/drivers/net/mlx_ipoib/patches/dhcpd.patch index 3f6269bf..e2d0a202 100644 --- a/src/drivers/net/mlx_ipoib/patches/dhcpd.patch +++ b/src/drivers/net/mlx_ipoib/patches/dhcpd.patch @@ -1,7 +1,17 @@ -Index: dhcp-3.0.4b3/includes/site.h -=================================================================== ---- dhcp-3.0.4b3.orig/includes/site.h 2002-03-12 20:33:39.000000000 +0200 -+++ dhcp-3.0.4b3/includes/site.h 2006-03-15 12:50:00.000000000 +0200 +diff -ru ../../orig/dhcp-3.0.4b2/common/options.c ./common/options.c +--- ../../orig/dhcp-3.0.4b2/common/options.c 2005-11-02 01:19:03.000000000 +0200 ++++ ./common/options.c 2005-12-06 14:38:17.000000000 +0200 +@@ -537,6 +537,7 @@ + priority_list [priority_len++] = DHO_DHCP_LEASE_TIME; + priority_list [priority_len++] = DHO_DHCP_MESSAGE; + priority_list [priority_len++] = DHO_DHCP_REQUESTED_ADDRESS; ++ priority_list [priority_len++] = DHO_DHCP_CLIENT_IDENTIFIER; + priority_list [priority_len++] = DHO_FQDN; + + if (prl && prl -> len > 0) { +diff -ru ../../orig/dhcp-3.0.4b2/includes/site.h ./includes/site.h +--- ../../orig/dhcp-3.0.4b2/includes/site.h 2002-03-12 20:33:39.000000000 +0200 ++++ ./includes/site.h 2005-12-06 14:36:55.000000000 +0200 @@ -135,7 +135,7 @@ the aforementioned problems do not matter to you, or if no other API is supported for your system, you may want to go with it. */ From 0979e674bb6a9156bb49a4b867f848fbf93a4c57 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Fri, 21 Sep 2007 18:27:51 +0100 Subject: [PATCH 81/84] Added more verbose memory-map debugging. Added redundant call to get_memmap() in int13_boot() immediately before jumping to boot sector, to assist in debugging. --- src/arch/i386/firmware/pcbios/memmap.c | 45 +++++++++++++++++++------- src/arch/i386/interface/pcbios/int13.c | 9 ++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/src/arch/i386/firmware/pcbios/memmap.c b/src/arch/i386/firmware/pcbios/memmap.c index 318ba091..b6a8ca3c 100644 --- a/src/arch/i386/firmware/pcbios/memmap.c +++ b/src/arch/i386/firmware/pcbios/memmap.c @@ -74,16 +74,19 @@ static unsigned int extmemsize_e801 ( void ) { "=d" ( confmem_16m_plus_64k ) : "a" ( 0xe801 ) ); - if ( flags & CF ) + if ( flags & CF ) { + DBG ( "INT 15,e801 failed with CF set\n" ); return 0; + } if ( ! ( extmem_1m_to_16m_k | extmem_16m_plus_64k ) ) { + DBG ( "INT 15,e801 extmem=0, using confmem\n" ); extmem_1m_to_16m_k = confmem_1m_to_16m_k; extmem_16m_plus_64k = confmem_16m_plus_64k; } extmem = ( extmem_1m_to_16m_k + ( extmem_16m_plus_64k * 64 ) ); - DBG ( "Extended memory size %d+64*%d=%d kB\n", + DBG ( "INT 15,e801 extended memory size %d+64*%d=%d kB\n", extmem_1m_to_16m_k, extmem_16m_plus_64k, extmem ); return extmem; } @@ -100,7 +103,7 @@ static unsigned int extmemsize_88 ( void ) { __asm__ __volatile__ ( REAL_CODE ( "int $0x15" ) : "=a" ( extmem ) : "a" ( 0x8800 ) ); - DBG ( "Extended memory size %d kB\n", extmem ); + DBG ( "INT 15,88 extended memory size %d kB\n", extmem ); return extmem; } @@ -149,14 +152,19 @@ static int meme820 ( struct memory_map *memmap ) { "d" ( SMAP ) : "memory" ); - if ( smap != SMAP ) + if ( smap != SMAP ) { + DBG ( "INT 15,e820 failed SMAP signature check\n" ); return -ENOTSUP; + } - if ( flags & CF ) + if ( flags & CF ) { + DBG ( "INT 15,e820 terminated on CF set\n" ); break; + } - DBG ( "E820 region [%llx,%llx) type %d\n", e820buf.start, - ( e820buf.start + e820buf.len ), ( int ) e820buf.type ); + DBG ( "INT 15,e820 region [%llx,%llx) type %d\n", + e820buf.start, ( e820buf.start + e820buf.len ), + ( int ) e820buf.type ); if ( e820buf.type != E820_TYPE_RAM ) continue; @@ -164,9 +172,18 @@ static int meme820 ( struct memory_map *memmap ) { region->end = e820buf.start + e820buf.len; region++; memmap->count++; - } while ( ( next != 0 ) && - ( memmap->count < ( sizeof ( memmap->regions ) / - sizeof ( memmap->regions[0] ) ) ) ); + + if ( memmap->count >= ( sizeof ( memmap->regions ) / + sizeof ( memmap->regions[0] ) ) ) { + DBG ( "INT 15,e820 too many regions returned\n" ); + /* Not a fatal error; what we've got so far at + * least represents valid regions of memory, + * even if we couldn't get them all. + */ + break; + } + } while ( next != 0 ); + return 0; } @@ -179,18 +196,24 @@ void get_memmap ( struct memory_map *memmap ) { unsigned int basemem, extmem; int rc; + DBG ( "Fetching system memory map\n" ); + /* Clear memory map */ memset ( memmap, 0, sizeof ( *memmap ) ); /* Get base and extended memory sizes */ basemem = basememsize(); + DBG ( "FBMS base memory size %d kB\n", basemem ); extmem = extmemsize(); /* Try INT 15,e820 first */ - if ( ( rc = meme820 ( memmap ) ) == 0 ) + if ( ( rc = meme820 ( memmap ) ) == 0 ) { + DBG ( "Obtained system memory map via INT 15,e820\n" ); return; + } /* Fall back to constructing a map from basemem and extmem sizes */ + DBG ( "INT 15,e820 failed; constructing map\n" ); memmap->regions[0].end = ( basemem * 1024 ); memmap->regions[1].start = 0x100000; memmap->regions[1].end = 0x100000 + ( extmem * 1024 ); diff --git a/src/arch/i386/interface/pcbios/int13.c b/src/arch/i386/interface/pcbios/int13.c index a26dcff4..7e09fb5f 100644 --- a/src/arch/i386/interface/pcbios/int13.c +++ b/src/arch/i386/interface/pcbios/int13.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ void unregister_int13_drive ( struct int13_drive *drive ) { * Note that this function can never return success, by definition. */ int int13_boot ( unsigned int drive ) { + struct memory_map memmap; int status, signature; int discard_c, discard_d; int rc; @@ -634,6 +636,13 @@ int int13_boot ( unsigned int drive ) { return -ENOEXEC; } + /* Dump out memory map prior to boot, if memmap debugging is + * enabled. Not required for program flow, but we have so + * many problems that turn out to be memory-map related that + * it's worth doing. + */ + get_memmap ( &memmap ); + /* Jump to boot sector */ if ( ( rc = call_bootsector ( 0x0, 0x7c00, drive ) ) != 0 ) { DBG ( "INT 13 drive %02x boot returned\n", drive ); From 2303c45d6503ef938ef8a75829116ff174ae7f09 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 4 Oct 2007 14:46:56 +0100 Subject: [PATCH 82/84] Force a netdevice poll in net_tx() before attempting to transmit packet, to work around the problem whereby small TX rings get backed up because we haven't yet poll()ed for TX completions. --- src/net/netdevice.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/net/netdevice.c b/src/net/netdevice.c index 59a50205..f2778e88 100644 --- a/src/net/netdevice.c +++ b/src/net/netdevice.c @@ -424,6 +424,14 @@ struct net_device * find_netdev_by_location ( unsigned int bus_type, */ int net_tx ( struct io_buffer *iobuf, struct net_device *netdev, struct net_protocol *net_protocol, const void *ll_dest ) { + + /* Force a poll on the netdevice to (potentially) clear any + * backed-up TX completions. This is needed on some network + * devices to avoid excessive losses due to small TX ring + * sizes. + */ + netdev_poll ( netdev ); + return netdev->ll_protocol->tx ( iobuf, netdev, net_protocol, ll_dest ); } From e445c2c07c499edc8ead8cdc40305b5bef5adc56 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Wed, 10 Oct 2007 20:12:25 +0100 Subject: [PATCH 83/84] Ensure that empty e820 regions are skipped even at the end of the memory map. (We achieve this by setting CF on the last entry if it is zero-length; this avoids the need to look ahead to see at each entry if the *next* entry would be both the last entry and zero-length). This fixes the "0kB base memory" error message upon starting Windows 2003 on a SunFire X2100. --- src/arch/i386/firmware/pcbios/e820mangler.S | 29 +++++++++++++-------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/src/arch/i386/firmware/pcbios/e820mangler.S b/src/arch/i386/firmware/pcbios/e820mangler.S index 21d1bb9a..e9328041 100644 --- a/src/arch/i386/firmware/pcbios/e820mangler.S +++ b/src/arch/i386/firmware/pcbios/e820mangler.S @@ -361,26 +361,33 @@ int15_e820: pushl %edx call split_e820 pushfw - /* Skip empty region checking if we've reached the end of the - * map or hit an error, to avoid a potential endless loop. - */ - jc 1f - testl %ebx, %ebx - jz 1f - /* Check for an empty region */ + /* If we've hit an error, exit immediately */ + jc 99f + /* If region is non-empty, return this region */ pushl %eax movl %es:8(%di), %eax orl %es:12(%di), %eax popl %eax - jnz 1f - /* Strip empty regions out of the returned map */ + jnz 99f + /* Region is empty. If this is not the end of the map, + * skip over this region. + */ + testl %ebx, %ebx + jz 1f popfw popl %edx popl %ecx popl %eax jmp int15_e820 - /* Restore flags from original INT 15,e820 call and return */ -1: popfw +1: /* Region is empty and this is the end of the map. Return + * with CF set to avoid placing an empty region at the end of + * the map. + */ + popfw + stc + pushfw +99: /* Restore flags from original INT 15,e820 call and return */ + popfw addr32 leal 12(%esp), %esp /* avoid changing flags */ lret $2 .size int15_e820, . - int15_e820 From 338d45a61e66e6afcddc41908b5daf664421f970 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 11 Oct 2007 00:43:21 +0100 Subject: [PATCH 84/84] Change ROM names to lower case. --- src/drivers/infiniband/arbel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/drivers/infiniband/arbel.c b/src/drivers/infiniband/arbel.c index 9ba81ca7..a57ade30 100644 --- a/src/drivers/infiniband/arbel.c +++ b/src/drivers/infiniband/arbel.c @@ -2117,8 +2117,8 @@ static void arbel_remove ( struct pci_device *pci ) { } static struct pci_device_id arbel_nics[] = { - PCI_ROM ( 0x15b3, 0x6282, "MT25218", "MT25218 HCA driver" ), - PCI_ROM ( 0x15b3, 0x6274, "MT25204", "MT25204 HCA driver" ), + PCI_ROM ( 0x15b3, 0x6282, "mt25218", "MT25218 HCA driver" ), + PCI_ROM ( 0x15b3, 0x6274, "mt25204", "MT25204 HCA driver" ), }; struct pci_driver arbel_driver __pci_driver = {