#include "modulecall_compat.h"

typedef struct SysenterStateV45 {
   uint64       rsp;
   uint64       rip;
   Selector     cs;
   Bool         validEIP;
};


/*
 * hostSysenter: values of host sysenter MSRs for each host cpu.
 */
static Bool hostMayUseSysenter = FALSE;
static SystemCallRegisters *hostSystemCall = NULL;


/*
 *----------------------------------------------------------------------
 * TaskProbeSysenterMSR --
 *
 *    This function computes sysenterMSR. Note that we assume that if a
 *    host is ever going to use the sysenter MSRs, they will have been
 *    initialized here; if some future host lazily sets up the sysenter
 *    MSRs, this code will cause trouble.
 *
 *----------------------------------------------------------------------
 */
static void
TaskProbeSysenterMSR(void)
{
   CPUOem cpuOem = CPUID_GetOEM();
   uint32 features = CPUID_GetFeatures();
   uint32 version = CPUID_GetVersion();

   /*
    * Some old intel CPUs set the SEP bit, even though they are not
    * capable of sysenter. In particular, CPUs with family of 6, but
    * with the whole version string < 0x633 (family, model, stepping).
    * This check is also performed in vmx/vmcore/cpuid.c.
    */
   if (cpuOem == CPUID_OEM_INTEL && CPUID_FAMILY(version) == 6 &&
       (CPUID_MODEL(version) < 3 ||
        (CPUID_MODEL(version) == 3 && CPUID_STEPPING(version) < 3))) {

      Warning("VMMON: CPU too old to use sysenter (ok).\n");
   } else if (features & CPUID_FEATURE_COMMON_ID1EDX_SEP) {
      /* The CPU appears to support sysenter. */
      hostMayUseSysenter = 1;
   }
}


/*
 *------------------------------------------------------------------------------
 *
 *  Task_SysenterMSRInitialize --
 *
 *    Initialize the per-physical-cpu sysenter msr structures.
 *
 * Results:
 *    TRUE  -> Successfully initialized.
 *    FALSE -> Unable to allocate memory for sysenter msrs;
 *             system cannot continue to load.
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

Bool
Task_SystemCallMSRInitialize(void)
{
   const unsigned cpus  = HostIF_NumOnlineLogicalCPUs();
   const unsigned bytes = cpus * sizeof(SystemCallRegisters);
   unsigned       i;

   TaskProbeSysenterMSR();
   
   /* Some linux kernels panic when allocating > 128Kb: ensure an
    * attempt is not made to run with ~1365 host cpus */
   ASSERT(bytes <= 131072);

   hostSystemCall = (SystemCallRegisters *)HostIF_AllocKernelMem(bytes, TRUE);
   if (hostSystemCall != NULL) {
      for (i = 0; i < cpus; ++i) {
         hostSystemCall[i].sysenterCS  = SYSENTER_SENTINEL_CS;
         hostSystemCall[i].sysenterRIP = SYSENTER_SENTINEL_RIP;
         hostSystemCall[i].sysenterRSP = SYSENTER_SENTINEL_RSP;
         hostSystemCall[i].star        = SYSCALL_SENTINEL_STAR;
         hostSystemCall[i].lstar       = SYSCALL_SENTINEL_LSTAR;
         hostSystemCall[i].cstar       = SYSCALL_SENTINEL_CSTAR;
         hostSystemCall[i].sfmask      = SYSCALL_SENTINEL_SFMASK;
      }
      return TRUE;
   } else {
      Warning("system call msr initialization failure; "
              "unable to allocate space for values\n");
      return FALSE;
   }
}


void
Task_SystemCallMSRReset(void)
{
   if (hostSystemCall != NULL) {
      HostIF_SystemCallReset(HostIF_NumOnlineLogicalCPUs(), hostSystemCall);
      HostIF_FreeKernelMem(hostSystemCall);
   }
}


static INLINE void
SystemcallSetMSR(uint32 msrNum, uint64 oldVal, uint64 newVal)
{
   if (oldVal != newVal) {
      HostIF_WRMSR(msrNum, newVal);
   }
}

static INLINE void
SysenterSetMSRCS(Selector oldCS, Selector newCS)
{
   if (oldCS != newCS) {
      HostIF_WRMSR(MSR_SYSENTER_CS, newCS);
   }
}


static INLINE void
SysenterValidateMSR(uint32 msrNum, uint64 expected)
{
   const uint64 val = HostIF_RDMSR(msrNum);
   if (val != expected) {
      Warning("sysenter MSR %#x is %#"FMT64"x; expected %#"FMT64"x\n",
              msrNum, val, expected);
      HostIF_WRMSR(msrNum, expected);
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterValidateVmmStateV5  --
 *
 *     Ensure that the physical sysenter msrs contain the correct
 *     values as expected by the vmm.  If the values are not correct,
 *     log a warning and reset the value.
 *     
 *     For internal diagnostic use only.  Should normally be disabled.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
SysenterValidateVmmStateV5(const SysenterStateV5 *vmmState)
{
   if (vmx86_debug) {
      SysenterValidateMSR(MSR_SYSENTER_CS, vmmState->hw.sysenterCS);
      if (vmmState->hw.sysenterCS != 0) {
         SysenterValidateMSR(MSR_SYSENTER_EIP, vmmState->hw.sysenterRIP);
         SysenterValidateMSR(MSR_SYSENTER_ESP, vmmState->hw.sysenterRSP);
      }
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterSaveHostStateV5  --
 *
 *     If required, save the host state before returning to the monitor.
 *     
 *     If not required, diagnostic code checks if the current hardware
 *     MSR values match the cached values.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
SysenterSaveHostStateV5(SystemCallRegisters   *hostMSR,
                        const SysenterStateV5 *vmmState)
{
   if (UNLIKELY(!vmmState->sysenterCacheMSR ||
                (hostMSR->sysenterCS == SYSENTER_SENTINEL_CS))) {
      hostMSR->sysenterRSP = HostIF_RDMSR(MSR_SYSENTER_ESP);
      hostMSR->sysenterRIP = HostIF_RDMSR(MSR_SYSENTER_EIP);
      hostMSR->sysenterCS  = (Selector)HostIF_RDMSR(MSR_SYSENTER_CS);
   }
}

   
/*
 *------------------------------------------------------------------------------
 *
 * TaskInitGuestSysenter  --
 *
 *    Initialize the crosspage sysenter msr state
 *
 * Results:
 *    none
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

static void
TaskInitGuestSysenter(SysenterStateV5 *sysenter)
{
   sysenter->hw.sysenterCS    = 0;
   sysenter->hw.sysenterRIP   = 0;
   sysenter->hw.sysenterRSP   = 0;
   sysenter->requestedCS      = 0;
   sysenter->sysenterCacheMSR = TRUE;
}


/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage  --
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

template <class VMCrossPage> static int 
Task_InitCrosspage_V55(VMDriver *vm,          // IN
                       InitBlock *initParams) // IN: Initial params from the VM 
{
   Vcpuid vcpuid;

   ASSERT(sizeof(VMCrossPage) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN == 1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      LA           linearAddr;
      void        *iparm = (void *)(uintptr_t)initParams->crosspage[vcpuid];
      VMCrossPage *p     = (VMCrossPage*)HostIF_MapCrossPage(vm, iparm, &linearAddr);
      MPN          crossPageMPN;

      if (p == NULL) {
         return 1;
      }

      vm->crosspage[vcpuid] = p;
      
      crossPageMPN = HostIF_LookupUserMPN(vm, iparm);

      if ((int64)crossPageMPN <= 0) {
         return 2;
      }
      
      p->crosspageMA = MPN_2_MA(crossPageMPN);
      p->hostCrossPageLA = (LA64)(uintptr_t)p;

      p->irqRelocateOffset[0]  = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1]  = IRQ_HOST_INTR2_BASE;
      p->userCallRequest       = MODULECALL_USERCALL_NONE;
      p->moduleCallInterrupted = FALSE;
      p->tscAdjustment         = 0;
   }
   return 0;
}


/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage  --
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

template <class VMCrossPage> static int 
Task_InitCrosspage_V5(VMDriver *vm,          // IN
                      InitBlock *initParams) // IN: Initial params from the VM 
{
   Vcpuid vcpuid;

   ASSERT(sizeof(VMCrossPage) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN == 1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      LA           linearAddr;
      void        *iparm = (void *)(uintptr_t)initParams->crosspage[vcpuid];
      VMCrossPage *p     = (VMCrossPage*)HostIF_MapCrossPage(vm, iparm, &linearAddr);
      MPN          crossPageMPN;

      if (p == NULL) {
         return 1;
      }

      vm->crosspage[vcpuid] = p;
      
      crossPageMPN = HostIF_LookupUserMPN(vm, iparm);

      if ((int64)crossPageMPN <= 0) {
         return 2;
      }
      
      p->crosspageMA = MPN_2_MA(crossPageMPN);
      p->hostCrossPageLA = (LA64)(uintptr_t)p;

      p->irqRelocateOffset[0]  = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1]  = IRQ_HOST_INTR2_BASE;
      p->userCallRequest       = MODULECALL_USERCALL_NONE;
      p->moduleCallInterrupted = FALSE;
      p->tscAdjustment         = 0;
      TaskInitGuestSysenter(&p->vmm32Sysenter);
      TaskInitGuestSysenter(&p->vmm64Sysenter);
   }
   return 0;
}


/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage --
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

static int 
Task_InitCrosspage_V45(VMDriver *vm,          // IN
                       InitBlock *initParams, // IN: Initial params from the VM 
		       int ws451)
{
   Vcpuid vcpuid;

   ASSERT(sizeof(VMCrossPageV45) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN == 1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      uintptr_t    cr3reg;
      LA           linearAddr;
      uint16       trReg;
      void        *iparm = (void *)(uintptr_t)initParams->crosspage[vcpuid];
      VMCrossPageV45  *p = (VMCrossPageV45*)HostIF_MapCrossPage(vm, iparm, &linearAddr);

      if (p == NULL) {
         return 1;
      }

      vm->crosspage[vcpuid] = p;
      GET_CR3(cr3reg);
      GET_TR(trReg);
#if defined(VM_X86_64)
      {
         MPN crossPageMPN = HostIF_LookupUserMPN(vm, (void *)(uintptr_t)initParams->crosspage[vcpuid]);
         int c32 = ws451 ? 1 : 2;
	 int c64 = ws451 ? 2 : 1;
      
         if ((int64)crossPageMPN <= 0) {
            return 2;
         }

         p->hostContext64.context.cr3 = cr3reg;
         p->hostVA64                  = (uintptr_t)p;

         /*
          * In order to make the transition to legacy mode, we must first turn
          * paging off on the processor. For this we need to be executing
          * code on an identity mapped page (the crosspage).
          */
         p->crosspageLinearAddr = (LA32)MPN_2_MA(crossPageMPN);

         /* Make sure there is no truncation */
         ASSERT((VA)p->crosspageLinearAddr == MPN_2_MA(crossPageMPN));      
         p->hostContext64VA = (uintptr_t)&p->hostContext64;

         /* XXX hack the initial monitor idt */
         p->monContext.idtr.dtr.offset = 0;
         p->monContext.idtr.dtr.limit = sizeof(Gate)*0x100 - 1;

         p->hostContext64.switchGdtr64.offset = p->crosspageLinearAddr + 
            offsetof(VMCrossPageV45, switchGdt);
         p->hostContext64.switchGdtr64.limit = sizeof(Descriptor) * 4 - 1;

         /*
          * Set up data for worldswitch jumping between modes.  The
          * data is used in the dynamically loaded worldswitch code.
          * The offsets are set in
          * $VMTREE/vmcore/vmx/main/monitorHosted32.c:PatchVmm64Info().
          */
         p->hostContext64.jump64Code.selector = MAKE_SELECTOR(c64, SELECTOR_GDT, 0);
         p->hostContext64.jump32Code.selector = MAKE_SELECTOR(c32, SELECTOR_GDT, 0);
         p->hostContext64.switchDS            = MAKE_SELECTOR(3, SELECTOR_GDT, 0);
         p->hostContext64.tr                  = trReg;
         
         // null descriptor
         p->switchGdt[0].word[0] = 0;
         p->switchGdt[0].word[1] = 0;

         // 64 bit CS descriptor
         Desc_SetDescriptor(&p->switchGdt[c64].desc, 0, (VA32)0xfffff, CODE_DESC, 
                            1, 0, 1, 0, 1);
         // Set the L bit to indicate running 64 bit mode.
         p->switchGdt[c64].desc.longmode = 1;

         // 32 bit compatibility mode CS descriptor
         Desc_SetDescriptor(&p->switchGdt[c32].desc, 0, (VA32)0xfffff, CODE_DESC, 
                            1, 0, 1, 1, 1);
         
         // 32 bit compatibility mode DS descriptor
         Desc_SetDescriptor(&p->switchGdt[3].desc, 0, (VA32)0xfffff, DATA_DESC, 
                            1, 0, 1, 1, 1);
      }
#else
      {
         DTR32 idtrReg;

         GET_IDT(idtrReg);
         p->hostContext.task.cr3 = cr3reg;
         p->crosspageLinearAddr  = linearAddr;
         p->hostContextVA        = (uintptr_t)&p->hostContext;
         p->monContext.idtr.dtr  = idtrReg; /* irrelevant */
         p->hostContext.tr       = trReg;
      }
#endif

      p->irqRelocateOffset[0]  = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1]  = IRQ_HOST_INTR2_BASE;
      p->userCallRequest       = MODULECALL_USERCALL_NONE;
      p->moduleCallInterrupted = FALSE;
      p->tscAdjustment         = 0;
   }
   return 0;
}


#ifndef VM_X86_64
/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage_V4
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

static int 
Task_InitCrosspage_V4(VMDriver *vm,          // IN
                      InitBlock *initParams) // IN: Initial params from the VM 
{
   LA baseLinearAddr = HOST_KERNEL_VA_2_LA(0);
   uint32 cr3reg;
   Vcpuid vcpuid;

   ASSERT(sizeof (VMCrossPageV4) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN==1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      /*
       * The physical memory associated to the crosspage has already been
       * locked in MonitorAllocateMemory() --hpreg
       */
      VMCrossPageV4 *p = (VMCrossPageV4*)HostIF_UserToDriverPtr(vm, initParams->crosspage[vcpuid]);
      if (p == NULL) {
         return 1;
      }
      vm->crosspage[vcpuid] = p;
      GET_CR3(cr3reg);
      p->hostContext.task.cr3 = cr3reg;
      p->hostVA = (VA32)p;
      p->hostContextVA = (uintptr_t)&p->hostContext;
      p->crosspageLinearAddr = baseLinearAddr + (uintptr_t)p;
      {
         DTR32 idtrReg;
         GET_IDT(idtrReg);
         p->monContext.idtr.dtr = idtrReg; /* irrelevant */
      }

      {
         uint16 trReg;
         GET_TR(trReg);
         p->hostContext.tr = trReg;
      }

      p->irqRelocateOffset[0] = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1] = IRQ_HOST_INTR2_BASE;

      p->userCallRequest = MODULECALL_USERCALL_NONE;
      p->moduleCallInterrupted = FALSE;
    }
   return 0;
}
#endif


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state.
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned.
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt.
 *
 *-----------------------------------------------------------------------------
 */

void 
Task_Switch_S1B1(VMDriver *vm,  // IN
                Vcpuid vcpuid) // IN
{
   uintptr_t   flags, cr0reg, cr2reg, cr4reg, new_cr4;
   uintptr_t   drReg;
#ifdef VM_X86_64
   uint64      kgs64, gs64, fs64;
#endif
   DTR hostGDT;
   Selector    cs, gs, fs;
   Selector    trReg;
   Selector    hostLDT;
   unsigned    lint;
   VMCrossPageS1B1 *crosspage = (VMCrossPageS1B1*)vm->crosspage[vcpuid];
   
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   vm->currentHostCpu[vcpuid] = HostIF_GetCurrentPCPU();
   
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   /* Ensure global pages are flushed */
   new_cr4 = cr4reg & ~CR4_PGE;
   SET_CR4(new_cr4);

   crosspage->hostCR4 = new_cr4;

   /*
    * Save DR7 since we need to disable debug breakpoints during the
    * world switch code.  Save DR6 in order to accomodate the ICEBP
    * instruction.  All other debug registers are saved lazily by the
    * monitor prior to first use.  NOTE: Since monitor is in legacy
    * mode save all DRs for x86_64 here.
    */

   crosspage->hostDRSaved = 0;
   crosspage->hostDRInHW = 0xff;
   
#define SAVE_DR(n)                             \
           GET_DR##n(drReg);                   \
           crosspage->hostDR[n] = drReg;       \
           crosspage->hostDRSaved |= (1 << n);

   if (vm_x86_64) {
      SAVE_DR(0);
      SAVE_DR(1);
      SAVE_DR(2);
      SAVE_DR(3);
   }

   SAVE_DR(6);
   SAVE_DR(7);

#undef SAVE_DR

   /*
    * Try to disable debug exceptions during the switch.
    * Unfortunately we cannot do this reliably, as the host
    * may have set DR7_GD.  This will cause the SET_DR7 to
    * trap, and the host trap handler can then put whatever
    * it wants in DR7 and resume after the SET_DR7.  We fix
    * this in the monitor, with our own trap handler.
    */

   if (UNLIKELY(crosspage->hostDR[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
   }

   GET_GDT(hostGDT);
   GET_LDT(hostLDT); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
#ifdef VM_X86_64
   kgs64 = GET_KernelGS64();
   gs64 = GET_GS64();
   fs64 = GET_FS64();
#endif
   cs = GET_CS();
   gs = GET_GS();
   fs = GET_FS();
   GET_TR(trReg);
   
   /* Save the %cs and %tr. */
   if (vm_x86_64) {
      crosspage->hostContext64.tr = trReg;
      crosspage->hostContext64.context.cs = cs;
   } else {
      crosspage->hostContext.tr = trReg;
      crosspage->hostContext.task.cs = cs;
   }

   SetupTemporaryGDT(vm->currentHostCpu[vcpuid], crosspage, hostGDT, cs, trReg);

   if (trReg) {
      /* To return to the task, mark it as unused. */
      Descriptor *desc;
      DTR      *hostContextGDT = GetHostContextGDT(crosspage);
      desc = (Descriptor *)(HOST_KERNEL_LA_2_VA(hostContextGDT->offset) + trReg);
      if (Desc_Type(desc) == TASK_DESC_BUSY) {
         Desc_SetType(desc, TASK_DESC);
      }
   }

   SwitchToMonitor_TOT(crosspage); /* See comment for SwitchToMonitor() */

   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   /* The monitor shouldn't modify CR8 */
   
   if (USE_TEMPORARY_GDT) {
      /* When enabled, vmmon is reentered on the temporary GDT. */
      SET_GDT(hostGDT);
   }
   SET_LDT(hostLDT);

   /* restore fs/gs must come before 64 bit fs/gs restore */
   SET_FS(fs);
   SET_GS(gs);
#ifdef VM_X86_64
   SET_FS64(fs64);
   SET_GS64(gs64);
   SET_KernelGS64(kgs64);
#endif

#ifdef VM_X86_64
#define CAST_HOST_DR(x) (x)
#else
#define CAST_HOST_DR(x) ((uint32) x)
#endif

#define RESTORE_DR(n) if ((crosspage->hostDRInHW & (1 << n)) == 0) {     \
                          drReg = CAST_HOST_DR(crosspage->hostDR[n]);    \
                          SET_DR##n(drReg);                              \
                      }

   RESTORE_DR(0);
   RESTORE_DR(1);
   RESTORE_DR(2);
   RESTORE_DR(3);
   RESTORE_DR(6);
   RESTORE_DR(7);

   if (UNLIKELY(crosspage->restoreGeneralDetect)) {
      crosspage->restoreGeneralDetect = 0;
      drReg = CAST_HOST_DR(crosspage->hostDR[7]) | DR7_GD;
      SET_DR7(drReg);
   }

#undef RESTORE_DR
#undef CAST_HOST_DR

   ASSERT_NO_INTERRUPTS();

   if (crosspage->isMCIntr()) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw).
       * Note2: RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement.
       */

#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)

      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.

	 /*
	  * Pass Machine Check Exception (Interrupt 0x12) to the host.
	  * See bug #45286 for details.
	  */
	 IRQ_INT(0x12);

         /*
          * pass the reserved vectors (20-31) as well. amd64 windows
          * generates these.
          */
	 IRQ_INT8(0x14);
	 IRQ_INT4(0x1c);

	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   vm->currentHostCpu[vcpuid] = INVALID_HOST_CPU;

   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromMonitor();
#endif
}

/*
 *-----------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state.
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned.
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt.
 *
 *-----------------------------------------------------------------------------
 */

void 
Task_Switch_V55(VMDriver *vm,  // IN
                Vcpuid vcpuid) // IN
{
   uintptr_t   flags, cr0reg, cr2reg, cr4reg, new_cr4;
   uintptr_t   drReg;
#ifdef VM_X86_64
   uint64      kgs64, gs64, fs64;
#endif
   DTR hostGDT;
   Selector    cs, gs, fs;
   Selector    trReg;
   Selector    hostLDT;
   unsigned    lint;
   VMCrossPageV55 *crosspage = (VMCrossPageV55*)vm->crosspage[vcpuid];
   
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   vm->currentHostCpu[vcpuid] = HostIF_GetCurrentPCPU();
   
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   /* Ensure global pages are flushed */
   new_cr4 = cr4reg & ~CR4_PGE;
   SET_CR4(new_cr4);

   crosspage->hostCR4 = new_cr4;

   /*
    * Save DR7 since we need to disable debug breakpoints during the
    * world switch code.  Save DR6 in order to accomodate the ICEBP
    * instruction.  All other debug registers are saved lazily by the
    * monitor prior to first use.  NOTE: Since monitor is in legacy
    * mode save all DRs for x86_64 here.
    */

   crosspage->hostDRSaved = 0;

#define SAVE_DR_SHADOW(n) \
                GET_DR##n(drReg); \
                crosspage->hostDR[n] = drReg; \
                SHADOW_DR64(crosspage, n) = drReg;    \
                crosspage->hostDRSaved |= (1 << n);

   if (vm_x86_64) {
      SAVE_DR_SHADOW(0);
      SAVE_DR_SHADOW(1);
      SAVE_DR_SHADOW(2);
      SAVE_DR_SHADOW(3);
   }

   SAVE_DR_SHADOW(6);
   SAVE_DR_SHADOW(7);

#undef SAVE_DR_SHADOW

   /*
    * Try to disable debug exceptions during the switch.
    * Unfortunately we cannot do this reliably, as the host
    * may have set DR7_GD.  This will cause the SET_DR7 to
    * trap, and the host trap handler can then put whatever
    * it wants in DR7 and resume after the SET_DR7.  We fix
    * this in the monitor, with our own trap handler.
    */

   if (UNLIKELY(crosspage->hostDR[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
      SHADOW_DR64(crosspage, 7) = DR7_DEFAULT;
   }

   GET_GDT(hostGDT);
   GET_LDT(hostLDT); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
#ifdef VM_X86_64
   kgs64 = GET_KernelGS64();
   gs64 = GET_GS64();
   fs64 = GET_FS64();
#endif
   cs = GET_CS();
   gs = GET_GS();
   fs = GET_FS();
   GET_TR(trReg);
   
   /* Save the %cs and %tr. */
   if (vm_x86_64) {
      crosspage->hostContext64.tr = trReg;
      crosspage->hostContext64.context.cs = cs;
   } else {
      crosspage->hostContext.tr = trReg;
      crosspage->hostContext.task.cs = cs;
   }

   SetupTemporaryGDT(vm->currentHostCpu[vcpuid], crosspage, hostGDT, cs, trReg);

   if (trReg) {
      /* To return to the task, mark it as unused. */
      Descriptor *desc;
      DTR      *hostContextGDT = GetHostContextGDT(crosspage);
      desc = (Descriptor *)(HOST_KERNEL_LA_2_VA(hostContextGDT->offset) + trReg);
      if (Desc_Type(desc) == TASK_DESC_BUSY) {
         Desc_SetType(desc, TASK_DESC);
      }
   }

   SwitchToMonitor_TOT(crosspage); /* See comment for SwitchToMonitor() */

   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   /* The monitor shouldn't modify CR8 */
   
   if (USE_TEMPORARY_GDT) {
      /* When enabled, vmmon is reentered on the temporary GDT. */
      SET_GDT(hostGDT);
   }
   SET_LDT(hostLDT);

   /* restore fs/gs must come before 64 bit fs/gs restore */
   SET_FS(fs);
   SET_GS(gs);
#ifdef VM_X86_64
   SET_FS64(fs64);
   SET_GS64(gs64);
   SET_KernelGS64(kgs64);
#endif

#ifdef VM_X86_64
#define CAST_HOST_DR(x) (x)
#else
#define CAST_HOST_DR(x) ((uint32) x)
#endif

#define RESTORE_DR(n) if ((crosspage->hostDRSaved & (1 << n)) &&                 \
                          (crosspage->hostDR[n] != SHADOW_DR64(crosspage, n))) { \
                          drReg = CAST_HOST_DR(crosspage->hostDR[n]);            \
                          SET_DR##n(drReg); \
                      }

   RESTORE_DR(0);
   RESTORE_DR(1);
   RESTORE_DR(2);
   RESTORE_DR(3);
   RESTORE_DR(6);
   RESTORE_DR(7);

   if (UNLIKELY(crosspage->restoreGeneralDetect)) {
      crosspage->restoreGeneralDetect = 0;
      drReg = CAST_HOST_DR(crosspage->hostDR[7]) | DR7_GD;
      SET_DR7(drReg);
   }

#undef RESTORE_DR
#undef CAST_HOST_DR

   ASSERT_NO_INTERRUPTS();

   if (crosspage->isMCIntr()) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw).
       * Note2: RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement.
       */

#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)

      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.

	 /*
	  * Pass Machine Check Exception (Interrupt 0x12) to the host.
	  * See bug #45286 for details.
	  */
	 IRQ_INT(0x12);

         /*
          * pass the reserved vectors (20-31) as well. amd64 windows
          * generates these.
          */
	 IRQ_INT8(0x14);
	 IRQ_INT4(0x1c);

	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   vm->currentHostCpu[vcpuid] = INVALID_HOST_CPU;

   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromMonitor();
#endif
}


/*
 *-----------------------------------------------------------------------------
 *
 * SyscallDisable --
 *
 *     Disable syscall instruction.
 *
 *   Results:
 *     Returns 64bit value usable for SyscallRestore()
 *
 *-----------------------------------------------------------------------------
 */

static INLINE uint64
SyscallDisable(void)
{
   if (CPUID_SyscallSupported()) {
      /* Disable EFER.SCE */
      uint64 efer = HostIF_RDMSR(MSR_EFER);
      if (efer & MSR_EFER_SCE) {
         HostIF_WRMSR(MSR_EFER, efer & ~MSR_EFER_SCE);
      }
      return efer;
   }
   return 0;
}


/*
 *-----------------------------------------------------------------------------
 *
 * SyscallRestore --
 *
 *     Reenables syscall instruction according to EFER argument.
 *
 *   Results:
 *     None.
 *
 *   Side effect:
 *     syscall enabled or disabled.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE void
SyscallRestore(uint64 efer)
{
   if (efer & MSR_EFER_SCE) {
      /* Restore EFER.SCE */
      HostIF_WRMSR(MSR_EFER, efer);
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterReturnToMonitorV5 --
 *
 *     Load sysenter state from vmmState.  Disable syscall.
 *
 *   Results:
 *     Previous syscall() state.
 *
 *   Side effect:
 *     sysenter and syscall set up.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER uint64
SysenterReturnToMonitorV5(SystemCallRegisters *hostMSR,
                          SysenterStateV5     *vmmState)
{
   SysenterSaveHostStateV5(hostMSR, vmmState);
   if (vmmState->requestedCS != 0) {
      /* Guest using sysenter hardware; update %eip, %esp.
       *
       * It is tempting to believe that one need only write %eip/%esp
       * when the guest OS changes them and the host does not use the
       * sysenter hardware.  However, this is not the case: consider a
       * UP VM which uses sysenter running on a host which does not
       * use sysenter.  In this scenario, the guest OS can set the
       * %eip & %esp msrs on host cpu0, and then get migrated to host
       * cpu1 (where the msr values would not be correct).
       */
      SystemcallSetMSR(MSR_SYSENTER_ESP, hostMSR->sysenterRSP, 
                                         vmmState->hw.sysenterRSP);
      SystemcallSetMSR(MSR_SYSENTER_EIP, hostMSR->sysenterRIP, 
                                         vmmState->hw.sysenterRIP);
   }
   SysenterSetMSRCS(hostMSR->sysenterCS, vmmState->requestedCS);

   /* Update the hardware state to reflect the requested CS value.
    * This can only be performed after the host sysenter state has
    * been saved (Consider a host which has %cs as 0, and the guest
    * sets %cs to some non-zero value, and then resets it back to
    * zero.  If the 'requested' value is used to compare against the
    * host value (rather than the value actually in the hardware),
    * then a mismatch will occur).
    */
   vmmState->hw.sysenterCS = vmmState->requestedCS;
   SysenterValidateVmmStateV5(vmmState);

   return SyscallDisable();
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterReturnFromMonitorV5 --
 *
 *     Restore host's sysenter and syscall state.
 *
 *   Results:
 *     None.
 *
 *   Side effect:
 *     Host's sysenter and syscall set up.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
SysenterReturnFromMonitorV5(SystemCallRegisters   *hostMSR,
                            const SysenterStateV5 *vmmState,
			    const uint64         efer)
{
   if (!vmmState->sysenterCacheMSR || hostMSR->sysenterCS != 0) {
      /* Host is using sysenter hardware, or caching disabled. */
      SystemcallSetMSR(MSR_SYSENTER_ESP, vmmState->hw.sysenterRSP, 
                                         hostMSR->sysenterRSP);
      SystemcallSetMSR(MSR_SYSENTER_EIP, vmmState->hw.sysenterRIP, 
                                         hostMSR->sysenterRIP);
   }
   SysenterSetMSRCS(vmmState->hw.sysenterCS, hostMSR->sysenterCS);
   SyscallRestore(efer);
}


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state.
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned.
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt.
 *
 *-----------------------------------------------------------------------------
 */

void 
Task_Switch_V5(VMDriver *vm,  // IN
               Vcpuid vcpuid) // IN
{
   uintptr_t   flags, cr0reg, cr2reg, cr4reg, new_cr4;
   uintptr_t   drReg;
#ifdef VM_X86_64
   uint64      gs64, fs64;
#endif
   DTR32       gdtreg;
   Descriptor *desc;
   VA          vAddr;
   Selector    cs, gs, fs;
   uint16 trReg;   
   uint16 ldtreg;
   unsigned int lint;
   VMCrossPageV5 *crosspage = (VMCrossPageV5*)vm->crosspage[vcpuid];
   SystemCallRegisters *hostMSR;
   SysenterStateV5 *vmmSysenter;
   uint64 efer;


#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   vm->currentHostCpu[vcpuid] = HostIF_GetCurrentPCPU();
   ASSERT(hostSystemCall != NULL);
   hostMSR = hostSystemCall + vm->currentHostCpu[vcpuid];
   
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   /* Ensure global pages are flushed */
   new_cr4 = cr4reg & ~CR4_PGE;
   SET_CR4(new_cr4);

   crosspage->hostCR4 = new_cr4;

   /*
    * Save DR7 since we need to disable debug breakpoints during the
    * world switch code.  Save DR6 in order to accomodate the ICEBP
    * instruction.  All other debug registers are saved lazily by the
    * monitor prior to first use.  NOTE: Since monitor is in legacy
    * mode save all DRs for x86_64 here.
    */

   crosspage->hostDRSaved = 0;

#define SAVE_DR_SHADOW(n) \
                GET_DR##n(drReg); \
                crosspage->hostDR[n] = drReg; \
                crosspage->shadowDR[n] = (uint32) drReg; \
                crosspage->hostDRSaved |= (1 << n);

   if (vm_x86_64) {
      SAVE_DR_SHADOW(0);
      SAVE_DR_SHADOW(1);
      SAVE_DR_SHADOW(2);
      SAVE_DR_SHADOW(3);
   }

   SAVE_DR_SHADOW(6);
   SAVE_DR_SHADOW(7);
   
#undef SAVE_DR_SHADOW
   
   /*
    * Try to disable debug exceptions during the switch.
    * Unfortunately we cannot do this reliably, as the host
    * may have set DR7_GD.  This will cause the SET_DR7 to
    * trap, and the host trap handler can then put whatever
    * it wants in DR7 and resume after the SET_DR7.  We fix
    * this in the monitor, with our own trap handler.
    */

   if (UNLIKELY(crosspage->hostDR[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
      crosspage->shadowDR[7] = DR7_DEFAULT;
   }

#ifdef VM_X86_64
   GET_GDT(crosspage->hostContext64.gdtr.dtr);
#else
   GET_GDT(gdtreg);
#endif
   GET_LDT(ldtreg); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
#ifdef VM_X86_64
   gs64 = GET_GS64();
   fs64 = GET_FS64();
#endif
   gs = GET_GS();
   fs = GET_FS();
   
   vmmSysenter = crosspage->runVmm64 ? &crosspage->vmm64Sysenter 
                                     : &crosspage->vmm32Sysenter;
   efer = SysenterReturnToMonitorV5(hostMSR, vmmSysenter);

   /*
    * Mark our task descriptor as unused, or we won't be
    * able to come back to it (SET_TR set the busy bit)
    */
   
   GET_TR(trReg);
   if (vm_x86_64) {
      crosspage->hostContext64.tr = trReg;
   } else {
      crosspage->hostContext.tr = trReg;
   }

   if (trReg) {
      if (vm_x86_64) {
         vAddr = HOST_KERNEL_LA_2_VA(crosspage->hostContext64.gdtr.dtr.offset) + trReg;
      } else {
         vAddr = HOST_KERNEL_LA_2_VA(gdtreg.offset) + trReg;
      }
      desc = (Descriptor *)(vAddr);
      if (Desc_Type(desc) == TASK_DESC_BUSY) {
         Desc_SetType(desc, TASK_DESC);
      }
   }

   /*
    * Save the CS on this side
    */
   cs = GET_CS();
   if (vm_x86_64) {
      crosspage->hostContext64.context.cs = cs;
   } else {
      crosspage->hostContext.task.cs = cs;
   }
   
   /* See comment for SwitchToMonitor() */
   SwitchToMonitor_TOT(crosspage);

   SysenterReturnFromMonitorV5(hostMSR, vmmSysenter, efer);

   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   /* The monitor shouldn't modify CR8 */
   
   SET_LDT(ldtreg);

   /* restore fs/gs must come before 64 bit fs/gs restore */
   SET_FS(fs);
   SET_GS(gs);
#ifdef VM_X86_64
   SET_FS64(fs64);
   SET_GS64(gs64);
#endif

#ifdef VM_X86_64
#define CAST_HOST_DR(x) (x)
#else
#define CAST_HOST_DR(x) ((uint32) x)
#endif

#define RESTORE_DR(n) if ((crosspage->hostDRSaved & (1 << n)) && \
                          (crosspage->hostDR[n] != crosspage->shadowDR[n])) { \
                          drReg = CAST_HOST_DR(crosspage->hostDR[n]); \
                          SET_DR##n(drReg); \
                      }

   RESTORE_DR(0);
   RESTORE_DR(1);
   RESTORE_DR(2);
   RESTORE_DR(3);
   RESTORE_DR(6);
   RESTORE_DR(7);

   if (UNLIKELY(crosspage->restoreGeneralDetect)) {
      crosspage->restoreGeneralDetect = 0;
      drReg = CAST_HOST_DR(crosspage->hostDR[7]) | DR7_GD;
      SET_DR7(drReg);
   }

#undef RESTORE_DR
#undef CAST_HOST_DR

   ASSERT_NO_INTERRUPTS();

   if (crosspage->isMCIntr()) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw).
       * Note2: RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement.
       */

#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)

      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.

	 /*
	  * Pass Machine Check Exception (Interrupt 0x12) to the host.
	  * See bug #45286 for details.
	  */
	 IRQ_INT(0x12);

         /*
          * pass the reserved vectors (20-31) as well. amd64 windows
          * generates these.
          */
	 IRQ_INT8(0x14);
	 IRQ_INT4(0x1c);

	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   vm->currentHostCpu[vcpuid] = INVALID_HOST_CPU;

   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromMonitor();
#endif
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterReturnToMonitorOld  --
 *
 *     This function saves the host OS' sysenter state prior to
 *     returning to the monitor.
 *
 *     The monitor does not know whether the host uses the MSRs. Thus,
 *     in order to avoid unnecessary writes to MSR_SYSENTER_CS, we
 *     clear this MSR here if the monitor does not use it but the host
 *     has set it to a non-zero value.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER uint64
SysenterReturnToMonitorOld(SysenterStateV45 *hostState,  // OUT
                           int monUsesSysenter)          // IN
{
   if (hostMayUseSysenter) {
      hostState->validEIP = 0;
      hostState->cs = (Selector)HostIF_RDMSR(MSR_SYSENTER_CS);
      if (hostState->cs != 0) {
         if (monUsesSysenter) {
            hostState->rsp = HostIF_RDMSR(MSR_SYSENTER_ESP);
            hostState->rip = HostIF_RDMSR(MSR_SYSENTER_EIP);
            hostState->validEIP = 1;
         } else {
            /* Monitor does not use sysenter, but host does.  Clear
             * MSR_SYSENTER_CS to induce #GP if sysenter used in the
             * vmm/guest.
             */
            HostIF_WRMSR(MSR_SYSENTER_CS, 0);
         }
      }
   }
   return SyscallDisable();
}


/*
 *-----------------------------------------------------------------------------
 *
 * SysenterReturnFromMonitorOld  --
 *
 *      Restore host sysenter state
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
SysenterReturnFromMonitorOld(const SysenterStateV45 *hostState,  // IN
                             int monUsesSysenter,                // IN
			     uint64 efer)                        // IN
{
   if (hostMayUseSysenter) {
      if (monUsesSysenter || hostState->cs != 0) {
         HostIF_WRMSR(MSR_SYSENTER_CS, hostState->cs);
      }
      if (hostState->validEIP) {
         HostIF_WRMSR(MSR_SYSENTER_ESP, hostState->rsp);
         HostIF_WRMSR(MSR_SYSENTER_EIP, hostState->rip);
      }
   }
   SyscallRestore(efer);
}

#ifdef VM_X86_64
/*
 * From an email with Petr regarding gcc's handling of the stdcall
 * attribute for x86-64:
 *
 *    As far as I can tell, for x86_64 there is only one calling
 *    convention:
 *       On Linux rdi/rsi/rdx/rcx/r8d/r9d for <= 6 arguments,
 *       others always on stack, caller always adjusts stack.
 *
 *       On Windows it is rcx/rdx/r8d/r9d for <= 4 arguments, rest on
 *       stack.  When more than 4 arguments are passed, spill space is
 *       reserved on the stack for the register arguments.  Argument
 *       5 is accessed at (5 * 8)(rsp).
 */

/*
 *-----------------------------------------------------------------------------
 *
 * SwitchToMonitor --
 *
 *      Wrapper that calls code to switch from the host to the monitor.
 *
 * Side effects:
 *      None for the module
 *
 *-----------------------------------------------------------------------------
 */
static INLINE_SINGLE_CALLER void
SwitchToMonitor32_V45(VMCrossPageV45 *crosspage, int ws451)
{
   VA32 monContextVA = VPN_2_VA(crosspage->crosspageStart()) + 
   		       offsetof(VMCrossPageV45, monContext);
   PA monContextPA   = crosspage->crosspageLinearAddr + 
                       offsetof(VMCrossPageV45, monContext);
   if (ws451) {
   __asm__ __volatile__(
      "call  *%4"     "\n\t"
      :
      : "a" (&crosspage->hostContext64),
        "b" (monContextPA),
        "c" (monContextVA),
        "d" (crosspage->hostSwitchCR3),
        "g" (crosspage->contextSwitch.hostXToVmm32)
      : "memory", "cc"
   );
   } else {
   __asm__ __volatile__(
      "pushq  %4"     "\n\t"
      "call  *%5"     "\n\t"
      :
      : "a" (&crosspage->hostContext64),
        "b" (monContextPA),
        "c" (monContextVA),
        "d" (crosspage->hostSwitchCR3),
        "g" ((uint64)crosspage->hostContext64.context.cs),
        "r" (crosspage->contextSwitch.hostXToVmm32)
      : "memory", "cc"
   );
   }
}

#else

static INLINE_SINGLE_CALLER void
SwitchToMonitor32(unsigned char *codePtr,
                  ContextInfoV45 *src,
                  ContextInfoV45 *dst,
                  uint32 newDstVA)
{
#ifdef __GNUC__
   uint32 dummy1;
   uint32 dummy2;

   /* Checked against the Intel manual and GCC --hpreg */
   __asm__ __volatile__(
      "pushl %2"       "\n\t"
      "pushl %3"       "\n\t"
      "pushl %4"       "\n\t"
      "pushl %5"       "\n\t"
      "call *%6"       "\n\t"
      "popl %0"        "\n\t"
      "popl %1"        "\n\t"
      "addl %7, %%esp"
      /* 
       * Unfortunately there is no way to tell to gcc that we are 
       * using 'pushl' and so %esp-relative addressing does not
       * work. To prevent miscompilation we must limit ourselves
       * to registers and immediate values only.
       *
       * We do not have to use early-clobber, as %0 & %1 are used
       * only before %7, and %7 is immediate value.
       */
      : "=b" (dummy1),
        "=a" (dummy2)
      : "g" (newDstVA),
        "ri" (dst),
        "ri" (src),
        "ri" (0),
        "r" (codePtr),
        "i" ((1 + 1 + 3) * 4) // retval + segment + 3 arguments pushed
      : "memory", "cc"
   );
#elif _MSC_VER
   __asm push newDstVA
   __asm push dst
   __asm push src
   __asm push 0
   __asm call codePtr
   __asm pop ebx
   __asm pop eax
   __asm add esp, 20
#else
#   error No compiler defined for SwitchToMonitor
#endif
}

template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor32_V3(VMCrossPage *crosspage)
{
   SwitchToMonitor32(crosspage->contextSwitch.hostXToVmm32, &crosspage->hostContext, &crosspage->monContext, 
   		     VPN_2_VA(crosspage->crosspageStart()) + offsetof(VMCrossPage, monContext));
}

static INLINE_SINGLE_CALLER void
SwitchToMonitor32_V45(VMCrossPageV45 *crosspage, int dummy)
{
   SwitchToMonitor32(crosspage->contextSwitch.hostXToVmm32, &crosspage->hostContext, &crosspage->monContext, 
   		     VPN_2_VA(crosspage->crosspageStart()) + offsetof(VMCrossPageV45, monContext));
}
#endif // VM_X86_64


#ifndef VM_X86_64
template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor_V3(VMCrossPage *crosspage)
{
   SwitchToMonitor32_V3(crosspage);
}
#endif // VM_X86_64


static INLINE_SINGLE_CALLER void
SwitchToMonitor_V45(VMCrossPageV45 *crosspage, int ws451)
{
   SwitchToMonitor32_V45(crosspage, ws451);
}


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state.
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned.
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt.
 *
 *-----------------------------------------------------------------------------
 */

void 
Task_Switch_V45(VMDriver *vm,  // IN
                Vcpuid vcpuid) // IN
{
   uintptr_t   flags, cr0reg, cr2reg, cr4reg, new_cr4;
   uintptr_t   drReg;
#ifdef VM_X86_64
   uint64      gs64, fs64;
#endif
   DTR32       gdtreg;
   Selector    cs, gs, fs;
   Descriptor *desc;
   SysenterStateV45 sysenterState;
   VA          vAddr;
   uint16 trReg;   
   uint16 ldtreg;
   unsigned int lint;
   VMCrossPageV45 *crosspage = (VMCrossPageV45*)vm->crosspage[vcpuid];
   uint64 efer;

#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   vm->currentHostCpu[vcpuid] = HostIF_GetCurrentPCPU();
   
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);
   
   /* Ensure global pages are flushed */
   new_cr4 = cr4reg & ~CR4_PGE;
   SET_CR4(new_cr4);

   crosspage->switchHostCR4 = new_cr4;
   
   /*
    * Save DR7 since we need to disable debug breakpoints during the
    * world switch code.  Save DR6 in order to accomodate the ICEBP
    * instruction.  All other debug registers are saved lazily by the
    * monitor prior to first use.  NOTE: Since monitor is in legacy
    * mode save all DRs for x86_64 here.
    */

   crosspage->hostDRSaved = 0;

#define SAVE_DR_SHADOW(n) \
                GET_DR##n(drReg); \
                crosspage->hostDR[n] = drReg; \
                crosspage->shadowDR[n] = (uint32) drReg; \
                crosspage->hostDRSaved |= (1 << n);

#ifdef VM_X86_64
   SAVE_DR_SHADOW(0);
   SAVE_DR_SHADOW(1);
   SAVE_DR_SHADOW(2);
   SAVE_DR_SHADOW(3);
#endif // VM_X86_64

   SAVE_DR_SHADOW(6);
   SAVE_DR_SHADOW(7);
   
#undef SAVE_DR_SHADOW
   
   /*
    * Disable debug exceptions during the switch.  Note: If
    * the DR7 GD bit is on, a trap occurred at the GET_DR
    * above, and this process is most likely dead.
    */

   if (UNLIKELY(crosspage->hostDR[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
      crosspage->shadowDR[7] = DR7_DEFAULT;
   }

#ifdef VM_X86_64
   GET_GDT(crosspage->hostContext64.gdtr64.dtr);
#else
   GET_GDT(gdtreg);
#endif
   GET_LDT(ldtreg); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
#ifdef VM_X86_64
   gs64 = GET_GS64();
   fs64 = GET_FS64();
#endif
   gs = GET_GS();
   fs = GET_FS();
   
   efer = SysenterReturnToMonitorOld(&sysenterState, crosspage->monUsesSysenter);

   /*
    * Mark our task descriptor as unused, or we won't be
    * able to come back to it (SET_TR set the busy bit)
    */
   
   GET_TR(trReg);
   if (vm_x86_64) {
      crosspage->hostContext64.tr = trReg;
   } else {
      crosspage->hostContext.tr = trReg;
   }

   if (trReg) {
      if (vm_x86_64) {
         vAddr = HOST_KERNEL_LA_2_VA(crosspage->hostContext64.gdtr64.dtr.offset) + trReg;
      } else {
         vAddr = HOST_KERNEL_LA_2_VA(gdtreg.offset) + trReg;
      }
      desc = (Descriptor *)(vAddr);
      if (Desc_Type(desc) == TASK_DESC_BUSY) {
         Desc_SetType(desc, TASK_DESC);
      }
   }

   /*
    * Save the CS on this side
    */
   cs = GET_CS();
   if (vm_x86_64) {
      crosspage->hostContext64.context.cs = cs;
   } else {
      *(uint16*) (&crosspage->hostContext.jump[4]) = cs;
      crosspage->hostContext.task.cs = cs;
   }

   /* See comment for SwitchToMonitor() */
   SwitchToMonitor_V45(crosspage, vm->vmVersion == VME_V45);

   SysenterReturnFromMonitorOld(&sysenterState, crosspage->monUsesSysenter, efer);
   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   
   SET_LDT(ldtreg);
   
   SET_FS(fs);
   SET_GS(gs);
#ifdef VM_X86_64
   SET_FS64(fs64);
   SET_GS64(gs64);
#endif

#ifdef VM_X86_64
#define CAST_HOST_DR(x) (x)
#else
#define CAST_HOST_DR(x) ((uint32) x)
#endif

#define RESTORE_DR(n) if ((crosspage->hostDRSaved & (1 << n)) && \
                          (crosspage->hostDR[n] != crosspage->shadowDR[n])) { \
                          drReg = CAST_HOST_DR(crosspage->hostDR[n]); \
                          SET_DR##n(drReg); \
                      }

   RESTORE_DR(0);
   RESTORE_DR(1);
   RESTORE_DR(2);
   RESTORE_DR(3);
   RESTORE_DR(6);
   RESTORE_DR(7);

#undef RESTORE_DR
#undef CAST_HOST_DR
   
   ASSERT_NO_INTERRUPTS();

   if (crosspage->isMCIntr()) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw).
       * Note2: RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement.
       */

#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)

      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.
#ifdef VM_X86_64
	 IRQ_INT32(0x00);
#else
	 IRQ_INT(0x12);
#endif
	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   vm->currentHostCpu[vcpuid] = INVALID_HOST_CPU;

   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromMonitor();
#endif
}


#ifndef VM_X86_64
/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

template <class VMCrossPage> static int 
Task_InitCrosspage(VMDriver *vm,          // IN
                   InitBlock *initParams) // IN: Initial params from the VM 
{
   LA baseLinearAddr = HOST_KERNEL_VA_2_LA(0);
   uint32 cr3reg;
   Vcpuid vcpuid;

   ASSERT(sizeof(VMCrossPage) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN==1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      /*
       * The physical memory associated to the crosspage has already been locked
       * in MonitorAllocateMemory() --hpreg
       */
      VMCrossPage *p = (VMCrossPage*)HostIF_UserToDriverPtr(vm, initParams->crosspage[vcpuid]);
      if (p == NULL) {
         return 1;
      }
      vm->crosspage[vcpuid] = p;
      GET_CR3(cr3reg);
      p->hostContext.task.cr3 = cr3reg;
      p->hostVA = (VA32)p;
      p->hostContextVA = (uintptr_t)&p->hostContext;
      p->crosspageLinearAddr = baseLinearAddr + (uintptr_t)p;
      {
         DTR32 idtrReg;
         GET_IDT(idtrReg);
         p->monContext.idtr.dtr = idtrReg; /* irrelevant */
      }

      {
         uint16 trReg;
         GET_TR(trReg);
         p->hostContext.tr = trReg;
      }

      p->irqRelocateOffset[0] = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1] = IRQ_HOST_INTR2_BASE;

#if defined(WINNT_DDK) && defined(VMX86_DEBUG)
      p->vmPtr = vm;
#endif
   }
   return 0;
}
#endif

int 
Task_InitCrosspage(VMDriver *vm,
                   InitBlock *initParams) // Initial params from the VM
{
   switch (vm->vmVersion) {
      case VME_TOT:
         return Task_InitCrosspage_TOT<VMCrossPageTOT>(vm, initParams);
      case VME_S1B1:
         return Task_InitCrosspage_V55<VMCrossPageS1B1>(vm, initParams);
      case VME_V55:
         return Task_InitCrosspage_V55<VMCrossPageV55>(vm, initParams);
      case VME_V5:
         return Task_InitCrosspage_V5<VMCrossPageV5>(vm, initParams);
      case VME_GSX32:
      case VME_V452:
	 return Task_InitCrosspage_V45(vm, initParams, 0);
      case VME_V45:
         return Task_InitCrosspage_V45(vm, initParams, 1);
#ifndef VM_X86_64
      case VME_V4:
         return Task_InitCrosspage_V4(vm, initParams);
      case VME_GSX251:
      case VME_GSX25:
         return Task_InitCrosspage<VMCrossPageGSX25>(vm, initParams);
      case VME_GSX2:
         return Task_InitCrosspage<VMCrossPageGSX2>(vm, initParams);
      case VME_V321:
         return Task_InitCrosspage<VMCrossPageV321>(vm, initParams);
      case VME_V32:
         return Task_InitCrosspage<VMCrossPageV32>(vm, initParams);
      case VME_V3:
         return Task_InitCrosspage<VMCrossPageV3>(vm, initParams);
      case VME_V2:
         return Task_InitCrosspage<VMCrossPageV2>(vm, initParams);
      case VME_GSX1:
         return Task_InitCrosspage<VMCrossPageGSX1>(vm, initParams);
#endif
      default:
         return 0xDEAD;
   }
}

#ifndef VM_X86_64
/*
 *------------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state. 
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned. 
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt. 
 *
 *------------------------------------------------------------------------------
 */

template <class VMCrossPage> static void
Task_Switch(VMCrossPage *crosspage,  // IN
            VMDriver *vm)     // IN
{
   DTR32       gdtreg;
   Descriptor *desc;
   uint32      flags, cr0reg, cr2reg, cr4reg;
   uint32      drreg[8];
   SysenterStateV45 sysenterState;
   Selector    gs, fs;
   VA          vAddr;
   uint16 trReg;   
   uint16 ldtreg;
   unsigned int lint;
   uint64 efer;
  
#if defined(linux) && defined(USE_PERFCOUNTERS)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   /* Ensure global pages are flushed */
   SET_CR4(cr4reg & ~CR4_PGE);

#define SAVE_DR_SHADOW(n) \
                GET_DR##n(drreg[n]); \
                crosspage->setDR(n, drreg[n]);

   SAVE_DR_SHADOW(0);
   SAVE_DR_SHADOW(0);
   SAVE_DR_SHADOW(1);
   SAVE_DR_SHADOW(2);
   SAVE_DR_SHADOW(3);
   SAVE_DR_SHADOW(6);
   SAVE_DR_SHADOW(7);

#undef SAVE_DR_SHADOW

   /*
    * Disable debug exceptions during the switch.  Note: If
    * the DR7 GD bit is on, a trap occurred at the GET_DR0
    * above, and this process is most likely dead.
    */

   if (UNLIKELY(drreg[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
      crosspage->setDR(7, DR7_DEFAULT);
   }

   GET_GDT(gdtreg); 
   GET_LDT(ldtreg); 

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */
   gs = GET_GS();
   fs = GET_FS();

   efer = SysenterReturnToMonitorOld(&sysenterState, crosspage->monUsesSysenter);

   /*
    * Mark our task descriptor as unused, or we won't be
    * able to come back to it (SET_TR set the busy bit)
    * 
    */
   
   GET_TR(trReg);
   crosspage->hostContext.tr = trReg;
   if (trReg) {
     vAddr = HOST_KERNEL_LA_2_VA(gdtreg.offset) + trReg;
     desc = (Descriptor *)(vAddr);
     if (Desc_Type(desc) == TASK_DESC_BUSY) {
       Desc_SetType(desc, TASK_DESC);
     }
   }


   /*
    * Save the CS on this side
    */

   *(uint16*) (&crosspage->hostContext.jump[4]) = GET_CS();
   crosspage->hostContext.task.cs = GET_CS();
   
   /*
    * see comment in taskswitch.h 
    */

   SwitchToMonitor_V3(crosspage);

   SysenterReturnFromMonitorOld(&sysenterState, crosspage->monUsesSysenter, efer);
   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   
   SET_LDT(ldtreg);  
   
   SET_FS(fs);
   SET_GS(gs);
   
#define RESTORE_DR(n) if (crosspage->isDRModified(n, drreg[n])) \
                              SET_DR##n(drreg[n])
   RESTORE_DR(0);
   RESTORE_DR(1);
   RESTORE_DR(2);
   RESTORE_DR(3);
   RESTORE_DR(6);
   RESTORE_DR(7);

#undef RESTORE_DR

   ASSERT_NO_INTERRUPTS();

   if (crosspage->isMCIntr()) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling 
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw). 
       * Note2; RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement. 
       */
#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)
      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.
	 IRQ_INT(0x12);
	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCOUNTERS)
   PerfCtr_SnapshotFromMonitor();
#endif
      
   return;
}

/*
 *----------------------------------------------------------------------
 *
 * Vmx86_RunVM  --
 *
 *      Let's move RunVM here from Vmx86... linux/signal.h contains
 *      code which does 'unsigned long int qq = -1', and this causes
 *      warnings... And we can make template products static then...
 *
 *----------------------------------------------------------------------
 */
 
/*
 *----------------------------------------------------------------------
 *
 * Vmx86_RunVM_V4  --
 *
 *      Main interaction between the module and the monitor:
 *
 *	   Run the monitor
 *	   Process module calls from the monitor
 *	   Make cross user calls to the main thread
 *	   Return to userlevel to process normal user calls
 *	   and to signal timeout or errors.
 *
 * Results:
 *      Positive: user call number.
 *	-1: error (currently not used).
 *
 * Side effects:
 *      Not really.
 *
 *----------------------------------------------------------------------
 */

template <class VMCrossPage> static int
Vmx86_RunVM_V4(VMDriver *vm, Vcpuid vcpuid)
{
   uint32 retval = MODULECALL_V4_USERRETURN;
   VMCrossPage *crosspage = (VMCrossPage*)vm->crosspage[vcpuid];

   ASSERT(crosspage);

   /*
    * Check if we were interrupted by signal.
    */
   if (crosspage->moduleCallInterrupted) {
      crosspage->moduleCallInterrupted = FALSE;
      goto skipTaskSwitch;
   }
   for (;;) {

      crosspage->retval = retval;

      /*
       * Task_Switch changes the world to the monitor.
       * The monitor is waiting in the BackToHost routine.
       */
      UCTIMESTAMP(crosspage, SWITCHING_TO_MONITOR);
      Task_Switch(crosspage, vm);
      UCTIMESTAMP(crosspage, SWITCHED_TO_MODULE);

skipTaskSwitch:;

      retval = MODULECALL_V4_USERRETURN;

      if (crosspage->userCallType != MODULECALL_USERCALL_NONE) {
         /*
          * This is the main user call path.
          *
          * There are two kinds of user calls.  Normal ones
          * are handled by the calling VCPU thread itself.
          * We just return from here (back to userlevel)
          * in that case.
          *
          * Calls marked userCallCross are handled by the main
          * VMX thread.  In this case, the userCallRequest field
          * indicates to the VMX that this VCPU wants to make
          * a user call.  This field may be consulted by the VMX
          * at any time (specifically when the VMX is awakened by
          * another VCPU), so it must be set after the other
          * user call arguments.  The VMX is responsible for
          * resetting this field and awakening the VCPU when
          * the user call is complete, via the ACK_USER_CALL
          * and COMPLETE_USER_CALL ioctl.  The latter implies
          * the former.
          *
          * When and how to use ACK_USER_CALL and COMPLETE_USER_CALL
          * are at the discretion of the VMX.  In particular,
          * COMPLETE_USER_CALL does not imply that the requested
          * operation has fully completed, only that the VCPU can
          * continue.  See the comment in MonitorLoopHandleUserCall()
          * for use scenarios.
          *
          * See also comment at MonitorLoopUserCallPoll().
          *
          * -- edward
          */

         if (!crosspage->userCallCross) {
            ASSERT(!crosspage->userCallRestart);
            return crosspage->userCallType;
         }

	 if (!crosspage->userCallRestart) {
	    ASSERT(crosspage->userCallRequest == MODULECALL_USERCALL_NONE);
	    crosspage->userCallRequest = crosspage->userCallType;
	    UCTIMESTAMP(crosspage, AWAKENING_VMX);
	    HostIF_UserCall(vm, vcpuid);
	 }

	 UCTIMESTAMP(crosspage, GOING_TO_SLEEP);
         if (HostIF_UserCallWait(vm, vcpuid, USERCALL_TIMEOUT)) {
	    ASSERT(crosspage->userCallRequest == MODULECALL_USERCALL_NONE);
	 } else {
	    retval = MODULECALL_V4_USERTIMEOUT;
	 }
	 UCTIMESTAMP(crosspage, AWAKE);
      }

      switch (crosspage->moduleCallType) {

      case MODULECALL_V4_NONE:
         break;

      case MODULECALL_V4_INTR:    // Already done in task.c
         break;

      case MODULECALL_V4_ISMPNLOCKED : {
         MPN32 mpn = crosspage->args[0];
         retval = HostIF_IsMPNLocked(vm, mpn);
         break;
      }

      case MODULECALL_V4_LOCKPAGE: {
         void *addr = (void *)crosspage->args[0];
	 ASSERT(sizeof retval == sizeof (MPN32));
         retval = Vmx86_LockPage(vm, addr, FALSE);
	 if (PAGE_LOCK_SUCCESS(retval)) {
            if (HostIF_MarkPageDirty(vm, addr)) {
               retval = Vmx86_UnlockPage(vm, addr);
               if (PAGE_LOCK_SUCCESS(retval)) {
                  retval = PAGE_LOCK_TOUCH_FAILED;
               }
            }
         }
         break;
      }

      case MODULECALL_V4_GET_RECYCLED_PAGE:
	 retval = Vmx86_GetRecycledPage(vm);
	 break;

      case MODULECALL_V4_SEMAWAIT:
         retval = HostIF_SemaphoreWait(vm, vcpuid, crosspage->args[0], USERCALL_TIMEOUT);
	 if (retval == MX_WAITINTERRUPTED) {
	    crosspage->moduleCallInterrupted = TRUE;
	    return USERCALL_RESTART;
	 }
         break;
      case MODULECALL_V4_SEMASIGNAL:
         retval = HostIF_SemaphoreSignal(crosspage->args[0]);
	 if (retval == MX_WAITINTERRUPTED) {
	    crosspage->moduleCallInterrupted = TRUE;
	    return USERCALL_RESTART;
	 }
         break;
      case MODULECALL_V4_SEMAFORCEWAKEUP: {
         HostIF_SemaphoreForceWakeup(vm, (Vcpuid) crosspage->args[0]);
         break;
      }
      case MODULECALL_V4_IPI:
         retval = HostIF_IPI(vm, ~0, TRUE);
         break;

      case MODULECALL_V4_UNLOCKPAGE: {
         void *addr = (void *)crosspage->args[0];
         Bool dirty = (Bool)crosspage->args[1];

         if (dirty) {
	    if (HostIF_MarkPageDirty(vm, addr)) {
	       retval = PAGE_UNLOCK_TOUCH_FAILED;
	    } else {
	       retval = Vmx86_UnlockPage(vm, addr);
	    }
         } else {
	    retval = Vmx86_UnlockPage(vm, addr);
	 }
         break;
      }

      case MODULECALL_V4_RELEASE_ANON_PAGE: {
         MPN32 mpn = (MPN32)crosspage->args[0];
	 retval = Vmx86_ReleaseAnonPage(vm, mpn);
         break;
      }

      default:
         Warning("ModuleCall %d not supported\n", crosspage->moduleCallType);
      }

      /*
       * Give other threads and processes a chance.
       *
       * This is important when we are not preemptable while
       * in the driver and the monitor (e.g., Linux).
       */

      HostIF_Reschedule();

   }

   NOT_REACHED();
}


/*
 *----------------------------------------------------------------------
 *
 * Vmx86_RunVM  --
 *
 *      Return to the monitor.
 *      Main interaction between the module and the monitor.
 *      Leaves the loop only to go back to user mode.
 *      Every interation of the loop switches back to the monitor
 *
 * Results:
 *      Returns the MODULECALL that forced it to leave the loop.
 *      back to the IOCTL handler of the module device driver
 *
 * Side effects:
 *      Dispatches the messages, everything can change
 *
 *----------------------------------------------------------------------
 */
template <class VMCrossPage> static int
Vmx86_RunVM(VMCrossPage *crosspage, VMDriver *vm)
{
   ModuleCallTypeV3 op;
   uint32 retval;

   ASSERT(vm && crosspage);

   retval = MODULECALL_V3_USERRETURN;

   while(1) {

      crosspage->retval = retval;

      /*
       * Task_Switch changes the world to the monitor.
       * The monitor is waiting in the BackToHost routine
       */
      Task_Switch(crosspage, vm);

      op = crosspage->moduleCallType;

      retval = MODULECALL_USER_START;

      if (op >  MODULECALL_USER_START&& op < MODULECALL_USER_END) {
         return op;
      }


      switch (op) {
      case MODULECALL_V3_NONE :
         break;


      case MODULECALL_V3_INTR : {
	 /*
	  * Note that as of SMP support, irq is the actual Interrupt number
	  * rather than the IRQ, b/c the IO APIC redirects IRQs every which
	  * way. --Jeremy.
	  */
#ifdef notdef
	 if (crosspage->args[1] == MODULECALL_USERCALL_NONE) {
	    /* nothing to be done if MODULECALL_V3_USERRETURN */
	    retval = MODULECALL_V3_USERRETURN;
	    break;
	 }
#endif
	 return crosspage->args[1];
      }
      case MODULECALL_V3_ISMPNLOCKED : {
         MPN mpn = crosspage->args[0];
         retval = HostIF_IsMPNLocked(vm, mpn);
         break;
      }

      case MODULECALL_V3_LOCKPAGE: {
         char dummy;
         void *addr = (void *)crosspage->args[0];
	 ASSERT(sizeof retval == sizeof (MPN));
         retval = Vmx86_LockPage(vm, addr, FALSE);
	 if (PAGE_LOCK_SUCCESS(retval)) {
            if (HostIF_CopyFromUser(&dummy, addr, 1) == 0) {
               HostIF_CopyToUser(addr, &dummy, 1);
            } else {
               retval = Vmx86_UnlockPage(vm, addr);
	       if (PAGE_LOCK_SUCCESS(retval)) {
		  retval = PAGE_LOCK_TOUCH_FAILED;
	       }
            }
         }
         break;
      }

      case MODULECALL_V3_UNLOCKPAGE: {
         char dummy;
         void *addr = (void *)crosspage->args[0];
         Bool dirty = (Bool)crosspage->args[1];

         if (dirty) {
            if (HostIF_CopyFromUser(&dummy, addr, 1) != 0 ||
                HostIF_CopyToUser(addr, &dummy, 1) != 0) {
	       retval = PAGE_UNLOCK_TOUCH_FAILED;
            } else {
	       retval = Vmx86_UnlockPage(vm, addr);
	    }
         } else {
            retval = Vmx86_UnlockPage(vm, addr);
         }
         break;
      }

      default:
         Panic("ModuleCallLoop %d not supported \n",op);
      }
   }

   ASSERT(op >= MODULECALL_USER_START && op < MODULECALL_USER_END);
   return op;
}
#endif

int Vmx86_RunVM(VMDriver *vm, Vcpuid vcpuid) {
#ifndef VM_X86_64
   void* crosspage = vm->crosspage[vcpuid];
#endif
   switch (vm->vmVersion) {
#ifndef VM_X86_64
      case VME_V2:
         return Vmx86_RunVM((VMCrossPageV2*)crosspage, vm);
      case VME_GSX1:
         return Vmx86_RunVM((VMCrossPageGSX1*)crosspage, vm);
      case VME_V3:
         return Vmx86_RunVM((VMCrossPageV3*)crosspage, vm);
      case VME_V321:
      	 return Vmx86_RunVM((VMCrossPageV321*)crosspage, vm);
      case VME_V32:
         return Vmx86_RunVM((VMCrossPageV32*)crosspage, vm);
      case VME_GSX2:
         return Vmx86_RunVM((VMCrossPageGSX2*)crosspage, vm);
      case VME_GSX251:
      case VME_GSX25:
         return Vmx86_RunVM((VMCrossPageGSX25*)crosspage, vm);
      case VME_V4:
         return Vmx86_RunVM_V4<VMCrossPageV4>(vm, vcpuid);
#endif
      case VME_V45:
      case VME_V452:
      case VME_GSX32:
         return Vmx86_RunVM_V45(vm, vcpuid);
      case VME_V5:
	 return Vmx86_RunVM_V5(vm, vcpuid);
      case VME_V55:
	 return Vmx86_RunVM_V55(vm, vcpuid);
      case VME_S1B1:
         return Vmx86_RunVM_S1B1(vm, vcpuid);
      case VME_TOT:
         return Vmx86_RunVM_TOT(vm, vcpuid);
      default:
         return -EINVAL;
   }
}

/*
 *----------------------------------------------------------------------
 *
 * CheckPadding --
 *
 *      check for expected padding --
 *      this check currently fails on the egcs compiler
 *
 * Results:
 *
 *      TRUE if the check succeeds -- module will be loaded
 *
 *
 *
 * Side effects:
 *      output to kernel log on error
 *      Structure size check may fail at build time, not at load time.
 *
 *----------------------------------------------------------------------
 */

#define VM_CHECKSIZE(structure, expected)       \
   if (sizeof(structure) != expected) {         \
      asm volatile(".print \"Unexpected ABI change: Structure " #structure " should take %c0 bytes, but took %c1!\"; .abort" : : "i"(expected), "i"(sizeof(structure))); \
   }

#define VM_LACHECK(structure, expected)							\
   if (offsetof(structure, crosspageLinearAddr) != expected) {				\
      asm volatile(".print \"Unexpected ABI change: Structure " #structure " should have crosspageLinearAddr at offset %c0, but has it at %c1!\"; .abort" : : \
		   "i"(expected), "i"(offsetof(structure, crosspageLinearAddr)));	\
   }

int
Task_CheckPadding(void)
{
   DTRWords32 dtr;
   uint16 *x;
   int bad = 0;

   memset(&dtr, 0, sizeof dtr);
   dtr.dtr.limit = 0x1111;
   dtr.dtr.offset = 0x22223333;

   x = (uint16*)&dtr;

   if (x[0] == 0x1111 && x[1] == 0x3333 && x[2] == 0x2222) {
   } else {
      Warning("DTR padding\n");
      bad = 1;
   }

   VM_CHECKSIZE(VMCrossPageTOT,   MODULECALL_CROSSPAGE_SIZE_TOT);
   VM_CHECKSIZE(VMCrossPageV5,    MODULECALL_CROSSPAGE_SIZE_V5);
   VM_CHECKSIZE(VMCrossPageV45,   MODULECALL_CROSSPAGE_SIZE_V45);
   VM_LACHECK(VMCrossPageV45,     MODULECALL_CROSSPAGE_LAOFFSET_V45);
   VM_CHECKSIZE(VMCrossPageV4,    MODULECALL_CROSSPAGE_SIZE_V4);
   VM_LACHECK(VMCrossPageV4,      MODULECALL_CROSSPAGE_LAOFFSET_V4);
   VM_CHECKSIZE(VMCrossPageGSX25, MODULECALL_CROSSPAGE_SIZE_GSX25);
   VM_LACHECK(VMCrossPageGSX25,   MODULECALL_CROSSPAGE_LAOFFSET_GSX25);
   VM_CHECKSIZE(VMCrossPageGSX2,  MODULECALL_CROSSPAGE_SIZE_GSX2);
   VM_LACHECK(VMCrossPageGSX2,    MODULECALL_CROSSPAGE_LAOFFSET_GSX2);
   VM_CHECKSIZE(VMCrossPageV321,  MODULECALL_CROSSPAGE_SIZE_V321);
   VM_LACHECK(VMCrossPageV321,    MODULECALL_CROSSPAGE_LAOFFSET_V321);
   VM_CHECKSIZE(VMCrossPageV32,   MODULECALL_CROSSPAGE_SIZE_V32);
   VM_LACHECK(VMCrossPageV32,     MODULECALL_CROSSPAGE_LAOFFSET_V32);
   VM_CHECKSIZE(VMCrossPageV3,    MODULECALL_CROSSPAGE_SIZE_V3);
   VM_LACHECK(VMCrossPageV3,      MODULECALL_CROSSPAGE_LAOFFSET_V3);
   VM_CHECKSIZE(VMCrossPageV2,    MODULECALL_CROSSPAGE_SIZE_V3);
   VM_LACHECK(VMCrossPageV2,      MODULECALL_CROSSPAGE_LAOFFSET_V3);
   VM_CHECKSIZE(VMCrossPageGSX1,  MODULECALL_CROSSPAGE_SIZE_V3);
   VM_LACHECK(VMCrossPageGSX1,    MODULECALL_CROSSPAGE_LAOFFSET_V3);

   if (!bad) {
      return TRUE;
   }

   printk("/dev/vmmon: Cannot load module. Use standard gcc compiler\n");
   return FALSE;
}
