david/ipxe
Archived
1
0

[xen] Use version 1 grant tables by default

Using version 1 grant tables limits guests to using 16TB of grantable
RAM, and prevents the use of subpage grants.  Some versions of the Xen
hypervisor refuse to allow the grant table version to be set after the
first grant references have been created, so the loaded operating
system may be stuck with whatever choice we make here.  We therefore
currently use version 2 grant tables, since they give the most
flexibility to the loaded OS.

Current versions (7.2.0) of the Windows PV drivers have no support for
version 2 grant tables, and will merrily create version 1 entries in
what the hypervisor believes to be a version 2 table.  This causes
some confusion.

Avoid this problem by attempting to use version 1 tables, since
otherwise we may render Windows unable to boot.

Play nicely with other potential bootloaders by accepting either
version 1 or version 2 grant tables (if we are unable to set our
requested version).

Note that the use of version 1 tables on a 64-bit system introduces a
possible failure path in which a frame number cannot fit into the
32-bit field within the v1 structure.  This in turn introduces
additional failure paths into netfront_transmit() and
netfront_refill_rx().

Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
Michael Brown 2014-08-13 17:23:11 +01:00
parent 3f39f9fcb3
commit be79ca535a
5 changed files with 351 additions and 111 deletions

View File

@ -145,22 +145,24 @@ static void hvm_unmap_hypercall ( struct hvm_device *hvm ) {
* *
* @v hvm HVM device * @v hvm HVM device
* @v space Source mapping space * @v space Source mapping space
* @v pages Number of pages * @v len Length (must be a multiple of PAGE_SIZE)
* @ret mmio MMIO space address, or NULL on error * @ret mmio MMIO space address, or NULL on error
*/ */
static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space, static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
unsigned int pages ) { size_t len ) {
struct xen_add_to_physmap add; struct xen_add_to_physmap add;
struct xen_remove_from_physmap remove; struct xen_remove_from_physmap remove;
unsigned int pages = ( len / PAGE_SIZE );
physaddr_t mmio_phys; physaddr_t mmio_phys;
unsigned int i; unsigned int i;
size_t len;
void *mmio; void *mmio;
int xenrc; int xenrc;
int rc; int rc;
/* Sanity check */
assert ( ( len % PAGE_SIZE ) == 0 );
/* Check for available space */ /* Check for available space */
len = ( pages * PAGE_SIZE );
if ( ( hvm->mmio_offset + len ) > hvm->mmio_len ) { if ( ( hvm->mmio_offset + len ) > hvm->mmio_len ) {
DBGC ( hvm, "HVM could not allocate %zd bytes of MMIO space " DBGC ( hvm, "HVM could not allocate %zd bytes of MMIO space "
"(%zd of %zd remaining)\n", len, "(%zd of %zd remaining)\n", len,
@ -218,12 +220,12 @@ static void * hvm_ioremap ( struct hvm_device *hvm, unsigned int space,
* *
* @v hvm HVM device * @v hvm HVM device
* @v mmio MMIO space address * @v mmio MMIO space address
* @v pages Number of pages * @v len Length (must be a multiple of PAGE_SIZE)
*/ */
static void hvm_iounmap ( struct hvm_device *hvm, void *mmio, static void hvm_iounmap ( struct hvm_device *hvm, void *mmio, size_t len ) {
unsigned int pages ) {
struct xen_remove_from_physmap remove; struct xen_remove_from_physmap remove;
physaddr_t mmio_phys = virt_to_phys ( mmio ); physaddr_t mmio_phys = virt_to_phys ( mmio );
unsigned int pages = ( len / PAGE_SIZE );
unsigned int i; unsigned int i;
int xenrc; int xenrc;
int rc; int rc;
@ -258,7 +260,8 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
int rc; int rc;
/* Map shared info page */ /* Map shared info page */
hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info, 1 ); hvm->xen.shared = hvm_ioremap ( hvm, XENMAPSPACE_shared_info,
PAGE_SIZE );
if ( ! hvm->xen.shared ) { if ( ! hvm->xen.shared ) {
rc = -ENOMEM; rc = -ENOMEM;
goto err_alloc; goto err_alloc;
@ -273,7 +276,7 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
return 0; return 0;
hvm_iounmap ( hvm, hvm->xen.shared, 1 ); hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
err_alloc: err_alloc:
return rc; return rc;
} }
@ -286,7 +289,7 @@ static int hvm_map_shared_info ( struct hvm_device *hvm ) {
static void hvm_unmap_shared_info ( struct hvm_device *hvm ) { static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
/* Unmap shared info page */ /* Unmap shared info page */
hvm_iounmap ( hvm, hvm->xen.shared, 1 ); hvm_iounmap ( hvm, hvm->xen.shared, PAGE_SIZE );
} }
/** /**
@ -296,56 +299,26 @@ static void hvm_unmap_shared_info ( struct hvm_device *hvm ) {
* @ret rc Return status code * @ret rc Return status code
*/ */
static int hvm_map_grant ( struct hvm_device *hvm ) { static int hvm_map_grant ( struct hvm_device *hvm ) {
struct gnttab_query_size size;
struct gnttab_set_version version;
physaddr_t grant_phys; physaddr_t grant_phys;
size_t len;
int xenrc;
int rc; int rc;
/* Get grant table size */ /* Initialise grant table */
size.dom = DOMID_SELF; if ( ( rc = xengrant_init ( &hvm->xen ) ) != 0 ) {
if ( ( xenrc = xengrant_query_size ( &hvm->xen, &size ) ) != 0 ) { DBGC ( hvm, "HVM could not initialise grant table: %s\n",
rc = -EXEN ( xenrc );
DBGC ( hvm, "HVM could not get grant table size: %s\n",
strerror ( rc ) ); strerror ( rc ) );
goto err_query_size; return rc;
}
len = ( size.nr_frames * PAGE_SIZE );
/* Configure to use version 2 tables */
version.version = 2;
if ( ( xenrc = xengrant_set_version ( &hvm->xen, &version ) ) != 0 ) {
rc = -EXEN ( xenrc );
DBGC ( hvm, "HVM could not set version 2 grant table: %s\n",
strerror ( rc ) );
goto err_set_version;
}
if ( version.version != 2 ) {
DBGC ( hvm, "HVM could not set version 2 grant table\n" );
rc = -ENOTTY;
goto err_set_version;
} }
/* Map grant table */ /* Map grant table */
hvm->xen.grant.table = hvm_ioremap ( hvm, XENMAPSPACE_grant_table, hvm->xen.grant.table = hvm_ioremap ( hvm, XENMAPSPACE_grant_table,
size.nr_frames ); hvm->xen.grant.len );
if ( ! hvm->xen.grant.table ) { if ( ! hvm->xen.grant.table )
rc = -ENODEV; return -ENODEV;
goto err_ioremap;
}
grant_phys = virt_to_phys ( hvm->xen.grant.table ); grant_phys = virt_to_phys ( hvm->xen.grant.table );
DBGC2 ( hvm, "HVM mapped grant table at [%08lx,%08lx)\n", DBGC2 ( hvm, "HVM mapped grant table at [%08lx,%08lx)\n",
grant_phys, ( grant_phys + len ) ); grant_phys, ( grant_phys + hvm->xen.grant.len ) );
hvm->xen.grant.count = ( len / sizeof ( hvm->xen.grant.table[0] ) );
return 0; return 0;
hvm_iounmap ( hvm, hvm->xen.grant.table, size.nr_frames );
err_ioremap:
err_set_version:
err_query_size:
return rc;
} }
/** /**
@ -354,11 +327,9 @@ static int hvm_map_grant ( struct hvm_device *hvm ) {
* @v hvm HVM device * @v hvm HVM device
*/ */
static void hvm_unmap_grant ( struct hvm_device *hvm ) { static void hvm_unmap_grant ( struct hvm_device *hvm ) {
size_t len;
/* Unmap grant table */ /* Unmap grant table */
len = ( hvm->xen.grant.count * sizeof ( hvm->xen.grant.table[0] ) ); hvm_iounmap ( hvm, hvm->xen.grant.table, hvm->xen.grant.len );
hvm_iounmap ( hvm, hvm->xen.grant.table, ( len / PAGE_SIZE ) );
} }
/** /**

View File

@ -292,8 +292,13 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
} }
/* Grant access to shared ring */ /* Grant access to shared ring */
xengrant_permit_access ( xen, ring->ref, xendev->backend_id, 0, if ( ( rc = xengrant_permit_access ( xen, ring->ref, xendev->backend_id,
ring->sring.raw ); 0, ring->sring.raw ) ) != 0 ) {
DBGC ( netfront, "NETFRONT %s could not permit access to "
"%#08lx: %s\n", xendev->key,
virt_to_phys ( ring->sring.raw ), strerror ( rc ) );
goto err_permit_access;
}
/* Publish shared ring reference */ /* Publish shared ring reference */
if ( ( rc = netfront_write_num ( netfront, ring->ref_key, if ( ( rc = netfront_write_num ( netfront, ring->ref_key,
@ -309,6 +314,7 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
netfront_rm ( netfront, ring->ref_key ); netfront_rm ( netfront, ring->ref_key );
err_write_num: err_write_num:
xengrant_invalidate ( xen, ring->ref ); xengrant_invalidate ( xen, ring->ref );
err_permit_access:
free_dma ( ring->sring.raw, PAGE_SIZE ); free_dma ( ring->sring.raw, PAGE_SIZE );
err_alloc: err_alloc:
return rc; return rc;
@ -320,39 +326,53 @@ static int netfront_create_ring ( struct netfront_nic *netfront,
* @v netfront Netfront device * @v netfront Netfront device
* @v ring Descriptor ring * @v ring Descriptor ring
* @v iobuf I/O buffer * @v iobuf I/O buffer
* @v id Buffer ID to fill in
* @v ref Grant reference to fill in * @v ref Grant reference to fill in
* @ret id Buffer ID * @ret rc Return status code
* *
* The caller is responsible for ensuring that there is space in the * The caller is responsible for ensuring that there is space in the
* ring. * ring.
*/ */
static unsigned int netfront_push ( struct netfront_nic *netfront, static int netfront_push ( struct netfront_nic *netfront,
struct netfront_ring *ring, struct netfront_ring *ring, struct io_buffer *iobuf,
struct io_buffer *iobuf, uint16_t *id, grant_ref_t *ref ) {
grant_ref_t *ref ) {
struct xen_device *xendev = netfront->xendev; struct xen_device *xendev = netfront->xendev;
struct xen_hypervisor *xen = xendev->xen; struct xen_hypervisor *xen = xendev->xen;
unsigned int id; unsigned int next_id;
unsigned int next_ref;
int rc;
/* Sanity check */ /* Sanity check */
assert ( ! netfront_ring_is_full ( ring ) ); assert ( ! netfront_ring_is_full ( ring ) );
/* Allocate buffer ID */ /* Allocate buffer ID */
id = ring->ids[ ( ring->id_prod++ ) & ( ring->count - 1 ) ]; next_id = ring->ids[ ring->id_prod & ( ring->count - 1 ) ];
next_ref = ring->refs[next_id];
/* Store I/O buffer */
assert ( ring->iobufs[id] == NULL );
ring->iobufs[id] = iobuf;
/* Grant access to I/O buffer page. I/O buffers are naturally /* Grant access to I/O buffer page. I/O buffers are naturally
* aligned, so we never need to worry about crossing a page * aligned, so we never need to worry about crossing a page
* boundary. * boundary.
*/ */
*ref = ring->refs[id]; if ( ( rc = xengrant_permit_access ( xen, next_ref, xendev->backend_id,
xengrant_permit_access ( xen, ring->refs[id], xendev->backend_id, 0, 0, iobuf->data ) ) != 0 ) {
iobuf->data ); DBGC ( netfront, "NETFRONT %s could not permit access to "
"%#08lx: %s\n", xendev->key,
virt_to_phys ( iobuf->data ), strerror ( rc ) );
return rc;
}
return id; /* Store I/O buffer */
assert ( ring->iobufs[next_id] == NULL );
ring->iobufs[next_id] = iobuf;
/* Consume buffer ID */
ring->id_prod++;
/* Return buffer ID and grant reference */
*id = next_id;
*ref = next_ref;
return 0;
} }
/** /**
@ -431,13 +451,15 @@ static void netfront_destroy_ring ( struct netfront_nic *netfront,
/** /**
* Refill receive descriptor ring * Refill receive descriptor ring
* *
* @v netfront Netfront device * @v netdev Network device
*/ */
static void netfront_refill_rx ( struct netfront_nic *netfront ) { static void netfront_refill_rx ( struct net_device *netdev ) {
struct netfront_nic *netfront = netdev->priv;
struct xen_device *xendev = netfront->xendev; struct xen_device *xendev = netfront->xendev;
struct io_buffer *iobuf; struct io_buffer *iobuf;
struct netif_rx_request *request; struct netif_rx_request *request;
int notify; int notify;
int rc;
/* Do nothing if ring is already full */ /* Do nothing if ring is already full */
if ( netfront_ring_is_full ( &netfront->rx ) ) if ( netfront_ring_is_full ( &netfront->rx ) )
@ -455,13 +477,20 @@ static void netfront_refill_rx ( struct netfront_nic *netfront ) {
/* Add to descriptor ring */ /* Add to descriptor ring */
request = RING_GET_REQUEST ( &netfront->rx_fring, request = RING_GET_REQUEST ( &netfront->rx_fring,
netfront->rx_fring.req_prod_pvt++); netfront->rx_fring.req_prod_pvt );
request->id = netfront_push ( netfront, &netfront->rx, iobuf, if ( ( rc = netfront_push ( netfront, &netfront->rx,
&request->gref ); iobuf, &request->id,
&request->gref ) ) != 0 ) {
netdev_rx_err ( netdev, iobuf, rc );
break;
}
DBGC2 ( netfront, "NETFRONT %s RX id %d ref %d is %#08lx+%zx\n", DBGC2 ( netfront, "NETFRONT %s RX id %d ref %d is %#08lx+%zx\n",
xendev->key, request->id, request->gref, xendev->key, request->id, request->gref,
virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ) ); virt_to_phys ( iobuf->data ), iob_tailroom ( iobuf ) );
/* Move to next descriptor */
netfront->rx_fring.req_prod_pvt++;
} while ( ! netfront_ring_is_full ( &netfront->rx ) ); } while ( ! netfront_ring_is_full ( &netfront->rx ) );
/* Push new descriptors and notify backend if applicable */ /* Push new descriptors and notify backend if applicable */
@ -526,7 +555,7 @@ static int netfront_open ( struct net_device *netdev ) {
} }
/* Refill receive descriptor ring */ /* Refill receive descriptor ring */
netfront_refill_rx ( netfront ); netfront_refill_rx ( netdev );
/* Set link up */ /* Set link up */
netdev_link_up ( netdev ); netdev_link_up ( netdev );
@ -614,6 +643,7 @@ static int netfront_transmit ( struct net_device *netdev,
struct xen_device *xendev = netfront->xendev; struct xen_device *xendev = netfront->xendev;
struct netif_tx_request *request; struct netif_tx_request *request;
int notify; int notify;
int rc;
/* Check that we have space in the ring */ /* Check that we have space in the ring */
if ( netfront_ring_is_full ( &netfront->tx ) ) { if ( netfront_ring_is_full ( &netfront->tx ) ) {
@ -624,9 +654,11 @@ static int netfront_transmit ( struct net_device *netdev,
/* Add to descriptor ring */ /* Add to descriptor ring */
request = RING_GET_REQUEST ( &netfront->tx_fring, request = RING_GET_REQUEST ( &netfront->tx_fring,
netfront->tx_fring.req_prod_pvt++ ); netfront->tx_fring.req_prod_pvt );
request->id = netfront_push ( netfront, &netfront->tx, iobuf, if ( ( rc = netfront_push ( netfront, &netfront->tx, iobuf,
&request->gref ); &request->id, &request->gref ) ) != 0 ) {
return rc;
}
request->offset = ( virt_to_phys ( iobuf->data ) & ( PAGE_SIZE - 1 ) ); request->offset = ( virt_to_phys ( iobuf->data ) & ( PAGE_SIZE - 1 ) );
request->flags = NETTXF_data_validated; request->flags = NETTXF_data_validated;
request->size = iob_len ( iobuf ); request->size = iob_len ( iobuf );
@ -634,6 +666,9 @@ static int netfront_transmit ( struct net_device *netdev,
xendev->key, request->id, request->gref, xendev->key, request->id, request->gref,
virt_to_phys ( iobuf->data ), iob_len ( iobuf ) ); virt_to_phys ( iobuf->data ), iob_len ( iobuf ) );
/* Consume descriptor */
netfront->tx_fring.req_prod_pvt++;
/* Push new descriptor and notify backend if applicable */ /* Push new descriptor and notify backend if applicable */
RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( &netfront->tx_fring, notify ); RING_PUSH_REQUESTS_AND_CHECK_NOTIFY ( &netfront->tx_fring, notify );
if ( notify ) if ( notify )
@ -727,7 +762,6 @@ static void netfront_poll_rx ( struct net_device *netdev ) {
* @v netdev Network device * @v netdev Network device
*/ */
static void netfront_poll ( struct net_device *netdev ) { static void netfront_poll ( struct net_device *netdev ) {
struct netfront_nic *netfront = netdev->priv;
/* Poll for TX completions */ /* Poll for TX completions */
netfront_poll_tx ( netdev ); netfront_poll_tx ( netdev );
@ -736,7 +770,7 @@ static void netfront_poll ( struct net_device *netdev ) {
netfront_poll_rx ( netdev ); netfront_poll_rx ( netdev );
/* Refill RX descriptor ring */ /* Refill RX descriptor ring */
netfront_refill_rx ( netfront ); netfront_refill_rx ( netdev );
} }
/** Network device operations */ /** Network device operations */

View File

@ -27,9 +27,11 @@ struct xen_hypercall;
/** A Xen grant table */ /** A Xen grant table */
struct xen_grant { struct xen_grant {
/** Grant table entries */ /** Grant table entries */
union grant_entry_v2 *table; struct grant_entry_v1 *table;
/** Number of grant table entries (must be a power of two) */ /** Total grant table length */
unsigned int count; size_t len;
/** Entry size shift (for later version tables) */
unsigned int shift;
/** Number of grant table entries in use */ /** Number of grant table entries in use */
unsigned int used; unsigned int used;
/** Most recently used grant reference */ /** Most recently used grant reference */

View File

@ -10,10 +10,14 @@
FILE_LICENCE ( GPL2_OR_LATER ); FILE_LICENCE ( GPL2_OR_LATER );
#include <stdint.h> #include <stdint.h>
#include <stdlib.h>
#include <ipxe/io.h> #include <ipxe/io.h>
#include <ipxe/xen.h> #include <ipxe/xen.h>
#include <xen/grant_table.h> #include <xen/grant_table.h>
/** Induced failure rate (for testing) */
#define XENGRANT_FAIL_RATE 0
/** /**
* Query grant table size * Query grant table size
* *
@ -46,6 +50,90 @@ xengrant_set_version ( struct xen_hypervisor *xen,
virt_to_phys ( version ), 1 ); virt_to_phys ( version ), 1 );
} }
/**
* Get grant table version
*
* @v xen Xen hypervisor
* @v version Version
* @ret xenrc Xen status code
*/
static inline __attribute__ (( always_inline )) int
xengrant_get_version ( struct xen_hypervisor *xen,
struct gnttab_get_version *version ) {
return xen_hypercall_3 ( xen, __HYPERVISOR_grant_table_op,
GNTTABOP_get_version,
virt_to_phys ( version ), 1 );
}
/**
* Get number of grant table entries
*
* @v xen Xen hypervisor
* @ret entries Number of grant table entries
*/
static inline __attribute__ (( always_inline )) unsigned int
xengrant_entries ( struct xen_hypervisor *xen ) {
return ( ( xen->grant.len / sizeof ( xen->grant.table[0] ) )
>> xen->grant.shift );
}
/**
* Get grant table entry header
*
* @v xen Xen hypervisor
* @v ref Grant reference
* @ret hdr Grant table entry header
*/
static inline __attribute__ (( always_inline )) struct grant_entry_header *
xengrant_header ( struct xen_hypervisor *xen, grant_ref_t ref ) {
struct grant_entry_v1 *v1;
v1 = &xen->grant.table[ ref << xen->grant.shift ];
return ( container_of ( &v1->flags, struct grant_entry_header, flags ));
}
/**
* Get version 1 grant table entry
*
* @v hdr Grant table entry header
* @ret v1 Version 1 grant table entry
*/
static inline __attribute__ (( always_inline )) struct grant_entry_v1 *
xengrant_v1 ( struct grant_entry_header *hdr ) {
return ( container_of ( &hdr->flags, struct grant_entry_v1, flags ) );
}
/**
* Get version 2 grant table entry
*
* @v hdr Grant table entry header
* @ret v2 Version 2 grant table entry
*/
static inline __attribute__ (( always_inline )) union grant_entry_v2 *
xengrant_v2 ( struct grant_entry_header *hdr ) {
return ( container_of ( &hdr->flags, union grant_entry_v2, hdr.flags ));
}
/**
* Zero grant table entry
*
* @v xen Xen hypervisor
* @v hdr Grant table entry header
*/
static inline void xengrant_zero ( struct xen_hypervisor *xen,
struct grant_entry_header *hdr ) {
uint32_t *dword = ( ( uint32_t * ) hdr );
unsigned int i = ( ( sizeof ( xen->grant.table[0] ) / sizeof ( *dword ))
<< xen->grant.shift );
while ( i-- )
writel ( 0, dword++ );
}
/** /**
* Invalidate access to a page * Invalidate access to a page
* *
@ -54,10 +142,10 @@ xengrant_set_version ( struct xen_hypervisor *xen,
*/ */
static inline __attribute__ (( always_inline )) void static inline __attribute__ (( always_inline )) void
xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) { xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
union grant_entry_v2 *entry = &xen->grant.table[ref]; struct grant_entry_header *hdr = xengrant_header ( xen, ref );
/* Sanity check */ /* Sanity check */
assert ( ( readw ( &entry->hdr.flags ) & assert ( ( readw ( &hdr->flags ) &
( GTF_reading | GTF_writing ) ) == 0 ); ( GTF_reading | GTF_writing ) ) == 0 );
/* This should apparently be done using a cmpxchg instruction. /* This should apparently be done using a cmpxchg instruction.
@ -65,7 +153,10 @@ xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
* mainly since our control flow generally does not permit * mainly since our control flow generally does not permit
* failure paths to themselves fail. * failure paths to themselves fail.
*/ */
writew ( 0, &entry->hdr.flags ); writew ( 0, &hdr->flags );
/* Leave reference marked as in-use (see xengrant_alloc()) */
writew ( DOMID_SELF, &hdr->domid );
} }
/** /**
@ -76,24 +167,63 @@ xengrant_invalidate ( struct xen_hypervisor *xen, grant_ref_t ref ) {
* @v domid Domain ID * @v domid Domain ID
* @v subflags Additional flags * @v subflags Additional flags
* @v page Page start * @v page Page start
* @ret rc Return status code
*/ */
static inline __attribute__ (( always_inline )) void static inline __attribute__ (( always_inline )) int
xengrant_permit_access ( struct xen_hypervisor *xen, grant_ref_t ref, xengrant_permit_access ( struct xen_hypervisor *xen, grant_ref_t ref,
domid_t domid, unsigned int subflags, void *page ) { domid_t domid, unsigned int subflags, void *page ) {
union grant_entry_v2 *entry = &xen->grant.table[ref]; struct grant_entry_header *hdr = xengrant_header ( xen, ref );
struct grant_entry_v1 *v1 = xengrant_v1 ( hdr );
union grant_entry_v2 *v2 = xengrant_v2 ( hdr );
unsigned long frame = ( virt_to_phys ( page ) / PAGE_SIZE ); unsigned long frame = ( virt_to_phys ( page ) / PAGE_SIZE );
writew ( domid, &entry->full_page.hdr.domid ); /* Fail (for test purposes) if applicable */
if ( sizeof ( physaddr_t ) == sizeof ( uint64_t ) ) { if ( ( XENGRANT_FAIL_RATE > 0 ) &&
writeq ( frame, &entry->full_page.frame ); ( random() % XENGRANT_FAIL_RATE ) == 0 ) {
} else { return -EAGAIN;
writel ( frame, &entry->full_page.frame );
} }
/* Record frame number. This may fail on a 64-bit system if
* we are using v1 grant tables. On a 32-bit system, there is
* no way for this code path to fail (with either v1 or v2
* grant tables); we allow the compiler to optimise the
* failure paths away to save space.
*/
if ( sizeof ( physaddr_t ) == sizeof ( uint64_t ) ) {
/* 64-bit system */
if ( xen->grant.shift ) {
/* Version 2 table: no possible failure */
writeq ( frame, &v2->full_page.frame );
} else {
/* Version 1 table: may fail if address above 16TB */
if ( frame > 0xffffffffUL )
return -ERANGE;
writel ( frame, &v1->frame );
}
} else {
/* 32-bit system */
if ( xen->grant.shift ) {
/* Version 2 table: no possible failure */
writel ( frame, &v2->full_page.frame );
} else {
/* Version 1 table: no possible failure */
writel ( frame, &v1->frame );
}
}
/* Record domain ID and flags */
writew ( domid, &hdr->domid );
wmb(); wmb();
writew ( ( GTF_permit_access | subflags ), &entry->full_page.hdr.flags); writew ( ( GTF_permit_access | subflags ), &hdr->flags );
wmb(); wmb();
return 0;
} }
extern int xengrant_init ( struct xen_hypervisor *xen );
extern int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs, extern int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
unsigned int count ); unsigned int count );
extern void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs, extern void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,

View File

@ -20,6 +20,7 @@
FILE_LICENCE ( GPL2_OR_LATER ); FILE_LICENCE ( GPL2_OR_LATER );
#include <stdint.h> #include <stdint.h>
#include <strings.h>
#include <errno.h> #include <errno.h>
#include <assert.h> #include <assert.h>
#include <ipxe/io.h> #include <ipxe/io.h>
@ -32,6 +33,106 @@ FILE_LICENCE ( GPL2_OR_LATER );
* *
*/ */
/** Grant table version to try setting
*
* Using version 1 grant tables limits guests to using 16TB of
* grantable RAM, and prevents the use of subpage grants. Some
* versions of the Xen hypervisor refuse to allow the grant table
* version to be set after the first grant references have been
* created, so the loaded operating system may be stuck with whatever
* choice we make here. We therefore currently use version 2 grant
* tables, since they give the most flexibility to the loaded OS.
*
* Current versions (7.2.0) of the Windows PV drivers have no support
* for version 2 grant tables, and will merrily create version 1
* entries in what the hypervisor believes to be a version 2 table.
* This causes some confusion.
*
* Avoid this problem by attempting to use version 1 tables, since
* otherwise we may render Windows unable to boot.
*
* Play nicely with other potential bootloaders by accepting either
* version 1 or version 2 grant tables (if we are unable to set our
* requested version).
*/
#define XENGRANT_TRY_VERSION 1
/**
* Initialise grant table
*
* @v xen Xen hypervisor
* @ret rc Return status code
*/
int xengrant_init ( struct xen_hypervisor *xen ) {
struct gnttab_query_size size;
struct gnttab_set_version set_version;
struct gnttab_get_version get_version;
struct grant_entry_v1 *v1;
union grant_entry_v2 *v2;
unsigned int version;
int xenrc;
int rc;
/* Get grant table size */
size.dom = DOMID_SELF;
if ( ( xenrc = xengrant_query_size ( xen, &size ) ) != 0 ) {
rc = -EXEN ( xenrc );
DBGC ( xen, "XENGRANT could not get table size: %s\n",
strerror ( rc ) );
return rc;
}
xen->grant.len = ( size.nr_frames * PAGE_SIZE );
/* Set grant table version, if applicable */
set_version.version = XENGRANT_TRY_VERSION;
if ( ( xenrc = xengrant_set_version ( xen, &set_version ) ) != 0 ) {
rc = -EXEN ( xenrc );
DBGC ( xen, "XENGRANT could not set version %d: %s\n",
XENGRANT_TRY_VERSION, strerror ( rc ) );
/* Continue; use whatever version is current */
}
/* Get grant table version */
get_version.dom = DOMID_SELF;
get_version.pad = 0;
if ( ( xenrc = xengrant_get_version ( xen, &get_version ) ) == 0 ) {
version = get_version.version;
switch ( version ) {
case 0:
/* Version not yet specified: will be version 1 */
version = 1;
break;
case 1 :
/* Version 1 table: nothing special to do */
break;
case 2:
/* Version 2 table: configure shift appropriately */
xen->grant.shift = ( fls ( sizeof ( *v2 ) /
sizeof ( *v1 ) ) - 1 );
break;
default:
/* Unsupported version */
DBGC ( xen, "XENGRANT detected unsupported version "
"%d\n", version );
return -ENOTSUP;
}
} else {
rc = -EXEN ( xenrc );
DBGC ( xen, "XENGRANT could not get version (assuming v1): "
"%s\n", strerror ( rc ) );
version = 1;
}
DBGC ( xen, "XENGRANT using v%d table with %d entries\n",
version, xengrant_entries ( xen ) );
return 0;
}
/** /**
* Allocate grant references * Allocate grant references
* *
@ -42,22 +143,22 @@ FILE_LICENCE ( GPL2_OR_LATER );
*/ */
int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs, int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
unsigned int count ) { unsigned int count ) {
union grant_entry_v2 *entry; struct grant_entry_header *hdr;
unsigned int mask = ( xen->grant.count - 1 ); unsigned int entries = xengrant_entries ( xen );
unsigned int mask = ( entries - 1 );
unsigned int check = 0; unsigned int check = 0;
unsigned int avail; unsigned int avail;
unsigned int ref; unsigned int ref;
/* Fail unless we have enough references available */ /* Fail unless we have enough references available */
avail = ( xen->grant.count - xen->grant.used - avail = ( entries - xen->grant.used - GNTTAB_NR_RESERVED_ENTRIES );
GNTTAB_NR_RESERVED_ENTRIES );
if ( avail < count ) { if ( avail < count ) {
DBGC ( xen, "XENGRANT cannot allocate %d references (only %d " DBGC ( xen, "XENGRANT cannot allocate %d references (only %d "
"of %d available)\n", count, avail, xen->grant.count ); "of %d available)\n", count, avail, entries );
return -ENOBUFS; return -ENOBUFS;
} }
DBGC ( xen, "XENGRANT allocating %d references (from %d of %d " DBGC ( xen, "XENGRANT allocating %d references (from %d of %d "
"available)\n", count, avail, xen->grant.count ); "available)\n", count, avail, entries );
/* Update number of references used */ /* Update number of references used */
xen->grant.used += count; xen->grant.used += count;
@ -66,24 +167,27 @@ int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
for ( ref = xen->grant.ref ; count ; ref = ( ( ref + 1 ) & mask ) ) { for ( ref = xen->grant.ref ; count ; ref = ( ( ref + 1 ) & mask ) ) {
/* Sanity check */ /* Sanity check */
assert ( check++ < xen->grant.count ); assert ( check++ < entries );
/* Skip reserved references */ /* Skip reserved references */
if ( ref < GNTTAB_NR_RESERVED_ENTRIES ) if ( ref < GNTTAB_NR_RESERVED_ENTRIES )
continue; continue;
/* Skip in-use references */ /* Skip in-use references */
entry = &xen->grant.table[ref]; hdr = xengrant_header ( xen, ref );
if ( readw ( &entry->hdr.flags ) & GTF_type_mask ) if ( readw ( &hdr->flags ) & GTF_type_mask )
continue; continue;
if ( readw ( &entry->hdr.domid ) == DOMID_SELF ) if ( readw ( &hdr->domid ) == DOMID_SELF )
continue; continue;
/* Zero reference */
xengrant_zero ( xen, hdr );
/* Mark reference as in-use. We leave the flags as /* Mark reference as in-use. We leave the flags as
* empty (to avoid creating a valid grant table entry) * empty (to avoid creating a valid grant table entry)
* and set the domid to DOMID_SELF. * and set the domid to DOMID_SELF.
*/ */
writew ( DOMID_SELF, &entry->hdr.domid ); writew ( DOMID_SELF, &hdr->domid );
DBGC2 ( xen, "XENGRANT allocated ref %d\n", ref ); DBGC2 ( xen, "XENGRANT allocated ref %d\n", ref );
/* Record reference */ /* Record reference */
@ -105,7 +209,7 @@ int xengrant_alloc ( struct xen_hypervisor *xen, grant_ref_t *refs,
*/ */
void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs, void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
unsigned int count ) { unsigned int count ) {
union grant_entry_v2 *entry; struct grant_entry_header *hdr;
unsigned int ref; unsigned int ref;
unsigned int i; unsigned int i;
@ -114,12 +218,11 @@ void xengrant_free ( struct xen_hypervisor *xen, grant_ref_t *refs,
/* Sanity check */ /* Sanity check */
ref = refs[i]; ref = refs[i];
assert ( ref < xen->grant.count ); assert ( ref < xengrant_entries ( xen ) );
/* Mark reference as unused */ /* Zero reference */
entry = &xen->grant.table[ref]; hdr = xengrant_header ( xen, ref );
writew ( 0, &entry->hdr.flags ); xengrant_zero ( xen, hdr );
writew ( 0, &entry->hdr.domid );
DBGC2 ( xen, "XENGRANT freed ref %d\n", ref ); DBGC2 ( xen, "XENGRANT freed ref %d\n", ref );
} }
} }