diff --git a/src/arch/x86/drivers/hyperv/hyperv.c b/src/arch/x86/drivers/hyperv/hyperv.c index b90937df..98c2b30c 100644 --- a/src/arch/x86/drivers/hyperv/hyperv.c +++ b/src/arch/x86/drivers/hyperv/hyperv.c @@ -40,6 +40,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); #include #include #include +#include #include #include #include @@ -299,6 +300,10 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) { uint64_t siefp; uint64_t scontrol; + /* Zero SynIC message and event pages */ + memset ( hv->synic.message, 0, PAGE_SIZE ); + memset ( hv->synic.event, 0, PAGE_SIZE ); + /* Map SynIC message page */ simp = rdmsr ( HV_X64_MSR_SIMP ); simp &= ( PAGE_SIZE - 1 ); @@ -321,21 +326,14 @@ static void hv_map_synic ( struct hv_hypervisor *hv ) { } /** - * Unmap synthetic interrupt controller + * Unmap synthetic interrupt controller, leaving SCONTROL untouched * * @v hv Hyper-V hypervisor */ -static void hv_unmap_synic ( struct hv_hypervisor *hv ) { - uint64_t scontrol; +static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) { uint64_t siefp; uint64_t simp; - /* Disable SynIC */ - scontrol = rdmsr ( HV_X64_MSR_SCONTROL ); - scontrol &= ~HV_SCONTROL_ENABLE; - DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol ); - wrmsr ( HV_X64_MSR_SCONTROL, scontrol ); - /* Unmap SynIC event page */ siefp = rdmsr ( HV_X64_MSR_SIEFP ); siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE ); @@ -349,6 +347,24 @@ static void hv_unmap_synic ( struct hv_hypervisor *hv ) { wrmsr ( HV_X64_MSR_SIMP, simp ); } +/** + * Unmap synthetic interrupt controller + * + * @v hv Hyper-V hypervisor + */ +static void hv_unmap_synic ( struct hv_hypervisor *hv ) { + uint64_t scontrol; + + /* Disable SynIC */ + scontrol = rdmsr ( HV_X64_MSR_SCONTROL ); + scontrol &= ~HV_SCONTROL_ENABLE; + DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol ); + wrmsr ( HV_X64_MSR_SCONTROL, scontrol ); + + /* Unmap SynIC event and message pages */ + hv_unmap_synic_no_scontrol ( hv ); +} + /** * Enable synthetic interrupt * @@ -385,8 +401,12 @@ void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) { unsigned long msr = HV_X64_MSR_SINT ( sintx ); uint64_t sint; - /* Disable synthetic interrupt */ + /* Do nothing if interrupt is already disabled */ sint = rdmsr ( msr ); + if ( sint & HV_SINT_MASKED ) + return; + + /* Disable synthetic interrupt */ sint &= ~HV_SINT_AUTO_EOI; sint |= HV_SINT_MASKED; DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint ); @@ -589,6 +609,7 @@ static void hv_remove ( struct root_device *rootdev ) { hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event, NULL ); free ( hv ); + rootdev_set_drvdata ( rootdev, NULL ); } /** Hyper-V root device driver */ @@ -603,6 +624,100 @@ struct root_device hv_root_device __root_device = { .driver = &hv_root_driver, }; +/** + * Quiesce system + * + */ +static void hv_quiesce ( void ) { + struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device ); + unsigned int i; + + /* Do nothing if we are not running in Hyper-V */ + if ( ! hv ) + return; + + /* The "enlightened" portions of the Windows Server 2016 boot + * process will not cleanly take ownership of an active + * Hyper-V connection. Experimentation shows that the minimum + * requirement is that we disable the SynIC message page + * (i.e. zero the SIMP MSR). + * + * We cannot perform a full shutdown of the Hyper-V + * connection. Experimentation shows that if we disable the + * SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016 + * will enter an indefinite wait loop. + * + * Attempt to create a safe handover environment by resetting + * all MSRs except for SCONTROL. + * + * Note that we do not shut down our VMBus devices, since we + * may need to unquiesce the system and continue operation. + */ + + /* Disable all synthetic interrupts */ + for ( i = 0 ; i <= HV_SINT_MAX ; i++ ) + hv_disable_sint ( hv, i ); + + /* Unmap synthetic interrupt controller, leaving SCONTROL + * enabled (see above). + */ + hv_unmap_synic_no_scontrol ( hv ); + + /* Unmap hypercall page */ + hv_unmap_hypercall ( hv ); + + DBGC ( hv, "HV %p quiesced\n", hv ); +} + +/** + * Unquiesce system + * + */ +static void hv_unquiesce ( void ) { + struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device ); + uint64_t simp; + int rc; + + /* Do nothing if we are not running in Hyper-V */ + if ( ! hv ) + return; + + /* Experimentation shows that the "enlightened" portions of + * Windows Server 2016 will break our Hyper-V connection at + * some point during a SAN boot. Surprisingly it does not + * change the guest OS ID MSR, but it does leave the SynIC + * message page disabled. + * + * Our own explicit quiescing procedure will also disable the + * SynIC message page. We can therefore use the SynIC message + * page enable bit as a heuristic to determine when we need to + * reestablish our Hyper-V connection. + */ + simp = rdmsr ( HV_X64_MSR_SIMP ); + if ( simp & HV_SIMP_ENABLE ) + return; + + /* Remap hypercall page */ + hv_map_hypercall ( hv ); + + /* Remap synthetic interrupt controller */ + hv_map_synic ( hv ); + + /* Reset Hyper-V devices */ + if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) { + DBGC ( hv, "HV %p could not unquiesce: %s\n", + hv, strerror ( rc ) ); + /* Nothing we can do */ + return; + } +} + +/** Hyper-V quiescer */ +struct quiescer hv_quiescer __quiescer = { + .quiesce = hv_quiesce, + .unquiesce = hv_unquiesce, +}; + /** * Probe timer * diff --git a/src/drivers/net/netvsc.c b/src/drivers/net/netvsc.c index d269cd63..5be52fb8 100644 --- a/src/drivers/net/netvsc.c +++ b/src/drivers/net/netvsc.c @@ -259,6 +259,15 @@ static int netvsc_revoke_buffer ( struct netvsc_device *netvsc, struct netvsc_revoke_buffer_message msg; int rc; + /* If the buffer's GPADL is obsolete (i.e. was created before + * the most recent Hyper-V reset), then we will never receive + * a response to the revoke message. Since the GPADL is + * already destroyed as far as the hypervisor is concerned, no + * further action is required. + */ + if ( netvsc_is_obsolete ( netvsc ) ) + return 0; + /* Construct message */ memset ( &msg, 0, sizeof ( msg ) ); msg.header.type = cpu_to_le32 ( buffer->revoke_type ); @@ -474,6 +483,14 @@ static int netvsc_transmit ( struct rndis_device *rndis, uint64_t xid; int rc; + /* If the device is obsolete (i.e. was opened before the most + * recent Hyper-V reset), then we will never receive transmit + * completions. Fail transmissions immediately to minimise + * the delay in closing and reopening the device. + */ + if ( netvsc_is_obsolete ( netvsc ) ) + return -EPIPE; + /* Sanity check */ assert ( iob_len ( iobuf ) >= sizeof ( *header ) ); assert ( iob_len ( iobuf ) == le32_to_cpu ( header->len ) ); @@ -823,6 +840,35 @@ static int netvsc_probe ( struct vmbus_device *vmdev ) { return rc; } +/** + * Reset device + * + * @v vmdev VMBus device + * @ret rc Return status code + */ +static int netvsc_reset ( struct vmbus_device *vmdev ) { + struct rndis_device *rndis = vmbus_get_drvdata ( vmdev ); + struct netvsc_device *netvsc = rndis->priv; + struct net_device *netdev = rndis->netdev; + int rc; + + /* A closed device holds no NetVSC (or RNDIS) state, so there + * is nothing to reset. + */ + if ( ! netdev_is_open ( netdev ) ) + return 0; + + /* Close and reopen device to reset any stale state */ + netdev_close ( netdev ); + if ( ( rc = netdev_open ( netdev ) ) != 0 ) { + DBGC ( netvsc, "NETVSC %s could not reopen: %s\n", + netvsc->name, strerror ( rc ) ); + return rc; + } + + return 0; +} + /** * Remove device * @@ -844,5 +890,6 @@ struct vmbus_driver netvsc_driver __vmbus_driver = { .type = VMBUS_TYPE ( 0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2, 0xd2, 0xf9, 0x65, 0xed, 0x0e ), .probe = netvsc_probe, + .reset = netvsc_reset, .remove = netvsc_remove, }; diff --git a/src/drivers/net/netvsc.h b/src/drivers/net/netvsc.h index 39eeb891..93192357 100644 --- a/src/drivers/net/netvsc.h +++ b/src/drivers/net/netvsc.h @@ -362,4 +362,19 @@ struct netvsc_device { int wait_rc; }; +/** + * Check if NetVSC device is obsolete + * + * @v netvsc NetVSC device + * @v is_obsolete NetVSC device is obsolete + * + * Check if NetVSC device is obsolete (i.e. was opened before the most + * recent Hyper-V reset). + */ +static inline __attribute__ (( always_inline )) int +netvsc_is_obsolete ( struct netvsc_device *netvsc ) { + + return vmbus_gpadl_is_obsolete ( netvsc->rx.gpadl ); +} + #endif /* _NETVSC_H */ diff --git a/src/include/ipxe/hyperv.h b/src/include/ipxe/hyperv.h index c61e2a08..9194a976 100644 --- a/src/include/ipxe/hyperv.h +++ b/src/include/ipxe/hyperv.h @@ -61,6 +61,9 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); /** Synthetic interrupt vector mask */ #define HV_SINT_VECTOR_MASK HV_SINT_VECTOR ( 0xff ) +/** Maximum synthetic interrupt number */ +#define HV_SINT_MAX 15 + /** Post message */ #define HV_POST_MESSAGE 0x005c diff --git a/src/include/ipxe/vmbus.h b/src/include/ipxe/vmbus.h index 26fc578c..68244185 100644 --- a/src/include/ipxe/vmbus.h +++ b/src/include/ipxe/vmbus.h @@ -479,6 +479,8 @@ struct vmbus_device { /** Hyper-V hypervisor */ struct hv_hypervisor *hv; + /** Channel instance */ + union uuid instance; /** Channel ID */ unsigned int channel; /** Monitor ID */ @@ -527,6 +529,12 @@ struct vmbus_driver { * @ret rc Return status code */ int ( * probe ) ( struct vmbus_device *vmdev ); + /** Reset device + * + * @v vmdev VMBus device + * @ret rc Return status code + */ + int ( * reset ) ( struct vmbus_device *vmdev ); /** Remove device * * @v vmdev VMBus device @@ -609,6 +617,23 @@ vmbus_unregister_pages ( struct vmbus_device *vmdev, list_del ( &pages->list ); } +extern unsigned int vmbus_obsolete_gpadl; + +/** + * Check if GPADL is obsolete + * + * @v gpadl GPADL ID + * @v is_obsolete GPADL ID is obsolete + * + * Check if GPADL is obsolete (i.e. was created before the most recent + * Hyper-V reset). + */ +static inline __attribute__ (( always_inline )) int +vmbus_gpadl_is_obsolete ( unsigned int gpadl ) { + + return ( gpadl <= vmbus_obsolete_gpadl ); +} + extern int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data, size_t len ); extern int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, @@ -629,6 +654,7 @@ extern int vmbus_poll ( struct vmbus_device *vmdev ); extern void vmbus_dump_channel ( struct vmbus_device *vmdev ); extern int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ); +extern int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent ); extern void vmbus_remove ( struct hv_hypervisor *hv, struct device *parent ); #endif /* _IPXE_VMBUS_H */ diff --git a/src/interface/hyperv/vmbus.c b/src/interface/hyperv/vmbus.c index 7915ddfe..45a7caec 100644 --- a/src/interface/hyperv/vmbus.c +++ b/src/interface/hyperv/vmbus.c @@ -50,6 +50,16 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); */ #define VMBUS_GPADL_MAGIC 0x18ae0000 +/** Current (i.e. most recently issued) GPADL ID */ +static unsigned int vmbus_gpadl = VMBUS_GPADL_MAGIC; + +/** Obsolete GPADL ID threshold + * + * When the Hyper-V connection is reset, any previous GPADLs are + * automatically rendered obsolete. + */ +unsigned int vmbus_obsolete_gpadl; + /** * Post message * @@ -281,12 +291,12 @@ int vmbus_establish_gpadl ( struct vmbus_device *vmdev, userptr_t data, uint64_t pfn[pfn_count]; } __attribute__ (( packed )) gpadlhdr; const struct vmbus_gpadl_created *created = &vmbus->message->created; - static unsigned int gpadl = VMBUS_GPADL_MAGIC; + unsigned int gpadl; unsigned int i; int rc; /* Allocate GPADL ID */ - gpadl++; + gpadl = ++vmbus_gpadl; /* Construct message */ memset ( &gpadlhdr, 0, sizeof ( gpadlhdr ) ); @@ -347,6 +357,15 @@ int vmbus_gpadl_teardown ( struct vmbus_device *vmdev, unsigned int gpadl ) { const struct vmbus_gpadl_torndown *torndown = &vmbus->message->torndown; int rc; + /* If GPADL is obsolete (i.e. was created before the most + * recent Hyper-V reset), then we will never receive a + * response to the teardown message. Since the GPADL is + * already destroyed as far as the hypervisor is concerned, no + * further action is required. + */ + if ( vmbus_gpadl_is_obsolete ( gpadl ) ) + return 0; + /* Construct message */ memset ( &teardown, 0, sizeof ( teardown ) ); teardown.header.type = cpu_to_le32 ( VMBUS_GPADL_TEARDOWN ); @@ -530,8 +549,7 @@ void vmbus_close ( struct vmbus_device *vmdev ) { } /* Tear down GPADL */ - if ( ( rc = vmbus_gpadl_teardown ( vmdev, - vmdev->gpadl ) ) != 0 ) { + if ( ( rc = vmbus_gpadl_teardown ( vmdev, vmdev->gpadl ) ) != 0 ) { DBGC ( vmdev, "VMBUS %s failed to tear down channel GPADL: " "%s\n", vmdev->dev.name, strerror ( rc ) ); /* We can't prevent the remote VM from continuing to @@ -1187,6 +1205,8 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv, &parent->children ); vmdev->dev.parent = parent; vmdev->hv = hv; + memcpy ( &vmdev->instance, &offer->instance, + sizeof ( vmdev->instance ) ); vmdev->channel = channel; vmdev->monitor = offer->monitor; vmdev->signal = ( offer->monitored ? @@ -1201,6 +1221,7 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv, } else if ( header->type == cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) { + /* End of offer list */ break; } else { @@ -1244,6 +1265,77 @@ static int vmbus_probe_channels ( struct hv_hypervisor *hv, return rc; } + +/** + * Reset channels + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + * @ret rc Return status code + */ +static int vmbus_reset_channels ( struct hv_hypervisor *hv, + struct device *parent ) { + struct vmbus *vmbus = hv->vmbus; + const struct vmbus_message_header *header = &vmbus->message->header; + const struct vmbus_offer_channel *offer = &vmbus->message->offer; + const union uuid *type; + struct vmbus_device *vmdev; + unsigned int channel; + int rc; + + /* Post message */ + if ( ( rc = vmbus_post_empty_message ( hv, VMBUS_REQUEST_OFFERS ) ) !=0) + return rc; + + /* Collect responses */ + while ( 1 ) { + + /* Wait for response */ + if ( ( rc = vmbus_wait_for_any_message ( hv ) ) != 0 ) + return rc; + + /* Handle response */ + if ( header->type == cpu_to_le32 ( VMBUS_OFFER_CHANNEL ) ) { + + /* Parse offer */ + type = &offer->type; + channel = le32_to_cpu ( offer->channel ); + DBGC2 ( vmbus, "VMBUS %p offer %d type %s", + vmbus, channel, uuid_ntoa ( type ) ); + if ( offer->monitored ) + DBGC2 ( vmbus, " monitor %d", offer->monitor ); + DBGC2 ( vmbus, "\n" ); + + /* Do nothing with the offer; we already have all + * of the relevant state from the initial probe. + */ + + } else if ( header->type == + cpu_to_le32 ( VMBUS_ALL_OFFERS_DELIVERED ) ) { + + /* End of offer list */ + break; + + } else { + DBGC ( vmbus, "VMBUS %p unexpected offer response type " + "%d\n", vmbus, le32_to_cpu ( header->type ) ); + return -EPROTO; + } + } + + /* Reset all devices */ + list_for_each_entry ( vmdev, &parent->children, dev.siblings ) { + if ( ( rc = vmdev->driver->reset ( vmdev ) ) != 0 ) { + DBGC ( vmdev, "VMBUS %s could not reset: %s\n", + vmdev->dev.name, strerror ( rc ) ); + /* Continue attempting to reset other devices */ + continue; + } + } + + return 0; +} + /** * Remove channels * @@ -1330,6 +1422,39 @@ int vmbus_probe ( struct hv_hypervisor *hv, struct device *parent ) { return rc; } +/** + * Reset Hyper-V virtual machine bus + * + * @v hv Hyper-V hypervisor + * @v parent Parent device + * @ret rc Return status code + */ +int vmbus_reset ( struct hv_hypervisor *hv, struct device *parent ) { + struct vmbus *vmbus = hv->vmbus; + int rc; + + /* Mark all existent GPADLs as obsolete */ + vmbus_obsolete_gpadl = vmbus_gpadl; + + /* Clear interrupt and monitor pages */ + memset ( vmbus->intr, 0, PAGE_SIZE ); + memset ( vmbus->monitor_in, 0, PAGE_SIZE ); + memset ( vmbus->monitor_out, 0, PAGE_SIZE ); + + /* Enable message interrupt */ + hv_enable_sint ( hv, VMBUS_MESSAGE_SINT ); + + /* Renegotiate protocol version */ + if ( ( rc = vmbus_negotiate_version ( hv ) ) != 0 ) + return rc; + + /* Reenumerate channels */ + if ( ( rc = vmbus_reset_channels ( hv, parent ) ) != 0 ) + return rc; + + return 0; +} + /** * Remove Hyper-V virtual machine bus *