/* **********************************************************
 * Copyright 1998 VMware, Inc.  All rights reserved. -- VMware Confidential
 * **********************************************************/

/*
 * task.c --
 *
 *      Task initialization and switching routines between the host
 *      and the monitor. 
 *     
 *      A task switch: 
 *          -saves the EFLAGS,CR0,CR2,CR4, and IDT
 *          -jumps to code on the shared page
 *              which saves the registers, GDT and CR3
 *              which then restores the registers, GDT and CR3
 *          -restores the IDT,CR0,CR2,CR4 and EFLAGS
 *                
 *      This file is pretty much independent of the host OS.
 *      
 */

extern "C" {
/*
 * Prevent asm/page.h inclusion on i386... Hackity hack, but apparently
 * we do not need asm/page.h anymore in this file - not surprising, this
 * is common file, yes?  And Windows do not have page.h, do they?
 */
#define _I386_PAGE_H
/* On Linux, must come before any inclusion of asm/page.h --hpreg */
#include "hostKernel.h"
#ifdef linux
#   include <linux/string.h>
#   include <linux/kernel.h>

#   ifdef USE_PERFCTRS_HOSTED
#      include "perfctr.h"
#   endif

#   define EXPORT_SYMTAB
#else
#   include <string.h>
#endif

#include "vmware.h"
#include "modulecall.h"
#include "vmx86.h"
#include "task.h"
#include "vm_asm.h"
#include "cpuid.h"
#include "hostif.h"
}
#include "comport.h"

/* gcc 3.4.0 cannot handle always_inline in templates */
#if __GNUC__ == 3 && __GNUC_MINOR__ == 4
#define TEMPLATE_INLINE_SINGLE_CALLER /* I'm giving up... Please do not use gcc 3.4 if you want stable compiler */
#define TEMPLATE_INLINE /* You known that gcc 3.4 is piece of crap, yes? */
#else
#define TEMPLATE_INLINE_SINGLE_CALLER INLINE_SINGLE_CALLER
#define TEMPLATE_INLINE INLINE
#endif

#if defined(_WIN64)
#include "vmmon-asm-x86-64.h"
#define USE_TEMPORARY_GDT 1
#else
#define USE_TEMPORARY_GDT 0
#endif

#include "vm_asm_x86_64.h"

#if defined(__APPLE__)
//#include <i386/seg.h> can't find mach_kdb.h
#define KERNEL64_CS 0x80
#else
#define KERNEL64_CS 0
#endif

#define TS_ASSERT(t) do { \
   DEBUG_ONLY(if (!(t)) TaskAssertFail(__LINE__);)  \
} while (0)

static uint32 dummyLVT;
static Descriptor **tempGDT;
static Bool inCompatMode, inLongMode;

static INLINE Bool
TaskInCompatMode(void)
{
#if defined(__APPLE__)
   return inCompatMode;
#else
   return FALSE;
#endif
}

static INLINE Bool
TaskInLongMode(void)
{
#if defined(__APPLE__)
   return inLongMode;
#else
   return vm_x86_64;
#endif
}

#ifdef VMX86_DEBUG
static void
TaskAssertFail(int line)
{
  CP_PutStr("TaskAssertFail*: ");
  CP_PutDec(line);
  CP_PutCrLf();
  SET_CR3(0);
}
#endif


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Terminate --
 *
 *      Called at driver unload time.  Undoes whatever Task_Initialize did.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Release temporary GDT memory.
 *
 *-----------------------------------------------------------------------------
 */

void
Task_Terminate(void)
{
   if (USE_TEMPORARY_GDT) {
      const unsigned cpus = HostIF_NumOnlineLogicalCPUs();
      unsigned       i;
      
      for (i = 0; i < cpus; ++i) {
         HostIF_FreeKernelMem(tempGDT[i]);
      }
      HostIF_FreeKernelMem(tempGDT);
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Initialize --
 *
 *      Called at driver load time to initialize module's static data.
 *
 * Results:
 *      TRUE iff initialization successful.
 *
 * Side effects:
 *      Temporary GDT memory allocated and initialized.
 *
 *-----------------------------------------------------------------------------
 */

Bool
Task_Initialize(void)
{
#ifdef VMX86_DEBUG
   CP_Init();
#endif
   inCompatMode = Vmx86_InCompatMode();
   inLongMode   = Vmx86_InLongMode();

   if (USE_TEMPORARY_GDT) {
      const unsigned cpus        = HostIF_NumOnlineLogicalCPUs();
      const unsigned numPtrBytes = cpus * sizeof(Descriptor *);
      unsigned       i;
      
      /* Some linux kernels panic when allocating > 128Kb */
      ASSERT(numPtrBytes <= 131072);
      tempGDT = (Descriptor**)HostIF_AllocKernelMem(numPtrBytes, TRUE);
      if (tempGDT != NULL) {
         for (i = 0; i < cpus; ++i) {
            const unsigned bytes = 0x10000; /* maximal GDT size */
            tempGDT[i] = (Descriptor*)HostIF_AllocKernelMem(bytes, TRUE);
            if (tempGDT[i] == NULL) {
               unsigned int j;

               Warning("Task_Initialize: Unable to allocate space "
                       "for temporary GDT[%u]", i);
               for (j = 0; j < i; ++j) {
                  HostIF_FreeKernelMem(tempGDT[j]);
               }
               HostIF_FreeKernelMem(tempGDT);
               return FALSE;
            }
         }
         return TRUE;
      } else {
         Warning("Task_Initialize: Unable to allocate space for temporary GDT "
                 "pointers");
         return FALSE;
      }
   } else {
      return TRUE;
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * SetupTemporaryGDT_TOT --
 *
 *      Set up a temporary GDT so that the TSS 'busy bit' can be
 *      changed without affecting the host's data structures.
 *
 * Results:
 *      
 *      The host's GDT is copied (or partially copied) to the
 *      dynamically allocated temporary GDT.
 *
 *      The hostContext in the crosspage is set up to reference the
 *      host GDT or the temporary GDT.
 *
 * Side effects:
 *
 *      Crosspage modified.
 *
 * Notes:
 *
 *      An OS which checks critical data structures, such as the GDT,
 *      can fail when this module changes the TSS busy bit in the host
 *      GDT.  To avoid this problem, we use a sparse copy of the host
 *      GDT to perform the manipulation of the TSS busy bit.
 *
 *      See PR8144.
 *
 *-----------------------------------------------------------------------------
 */
template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SetupTemporaryGDT_TOT(uint32 pcpuid, VMCrossPage *crosspage,
                      DTR64 hostGDT64, Selector ldt, Selector cs, Selector tr)
{
   LA64 hostGDTBaseLA;

   if (USE_TEMPORARY_GDT) {
      const Bool COPY_WHOLE_GDT = TRUE;
      const VA   hostGDTVA      = HOST_KERNEL_LA_2_VA(hostGDT64.offset);

      ASSERT(hostGDTVA == HOST_KERNEL_LA_2_VA(hostGDT64.offset));      
      ASSERT((tr & SELECTOR_RPL_MASK) == 0);

      if (COPY_WHOLE_GDT) {
         memcpy(tempGDT[pcpuid], (void *)hostGDTVA, hostGDT64.limit + 1); // remove string.h
      } else {
         const unsigned size = sizeof(Descriptor);
         const Selector ds   = SELECTOR_CLEAR_RPL(GET_DS()); 
         const Selector es   = SELECTOR_CLEAR_RPL(GET_ES());
         const Selector ss   = SELECTOR_CLEAR_RPL(GET_SS());

         /* Ldt and tr descriptors use two entries (64-bits wide) in 64-bit mode. */
         tempGDT[pcpuid][cs  / size]     = *(Descriptor *)(hostGDTVA + cs);
         tempGDT[pcpuid][ds  / size]     = *(Descriptor *)(hostGDTVA + ds);
         tempGDT[pcpuid][es  / size]     = *(Descriptor *)(hostGDTVA + es);
         tempGDT[pcpuid][ss  / size]     = *(Descriptor *)(hostGDTVA + ss);
         tempGDT[pcpuid][ldt / size]     = *(Descriptor *)(hostGDTVA + ldt);
         tempGDT[pcpuid][ldt / size + 1] = *(Descriptor *)(hostGDTVA + ldt + 1);
         tempGDT[pcpuid][tr  / size]     = *(Descriptor *)(hostGDTVA + tr);
         tempGDT[pcpuid][tr  / size + 1] = *(Descriptor *)(hostGDTVA + tr + 1);
      }
      /* Set up cross page so temporary GDT will be used when
       * returning from the monitor. */
      hostGDTBaseLA = (LA)HOST_KERNEL_VA_2_LA(tempGDT[pcpuid]);
   } else {
      hostGDTBaseLA = hostGDT64.offset;
   }

   if (TaskInLongMode()) {
      crosspage->hostContext64.gdtr.dtr.limit  = hostGDT64.limit;
      crosspage->hostContext64.gdtr.dtr.offset = hostGDTBaseLA;
   } else {
      TS_ASSERT(hostGDTBaseLA <= 0xFFFFFFFFU);
      crosspage->hostContext.gdtr.dtr.limit  = hostGDT64.limit;
      crosspage->hostContext.gdtr.dtr.offset = (uint32)hostGDTBaseLA;
   }
}


/*
 *------------------------------------------------------------------------------
 *
 * Task_InitCrosspage  --
 *
 *    Initialize the crosspage used to switch to the monitor task. 
 *
 * Results:
 *    0 on success
 *    != 0 on failure
 *
 * Side effects:
 *    None
 *  
 *------------------------------------------------------------------------------
 */

template <class VMCrossPage> static int 
Task_InitCrosspage_TOT(VMDriver *vm,          // IN
                       InitBlock *initParams) // IN: Initial params from the VM 
{
   Vcpuid vcpuid;

   ASSERT(sizeof(VMCrossPage) < PAGE_SIZE);
   ASSERT(MODULECALL_CROSS_PAGE_LEN == 1);
   for (vcpuid = 0; vcpuid < initParams->numVCPUs;  vcpuid++) {
      LA           linearAddr;
      void        *iparm = (void *)(uintptr_t)initParams->crosspage[vcpuid];
      VMCrossPage *p     = (VMCrossPage *)HostIF_MapCrossPage(vm, iparm, &linearAddr);
      MPN          crossPageMPN;

      if (p == NULL) {
         return 1;
      }
      
      crossPageMPN = HostIF_LookupUserMPN(vm, iparm);

      if ((int64)crossPageMPN <= 0) {
         return 1;
      }
      
      {
         /* The version of the crosspage must be the first four
	  * bytes of the crosspage.  See the declaration
	  * of VMCrossPage in modulecall.h.
          */
         ASSERT_ON_COMPILE(offsetof(VMCrossPage, version) == 0);
	 ASSERT_ON_COMPILE(sizeof(p->version) == sizeof(uint32));

	 /* p->version is VMX's version; CROSSPAGE_VERSION is vmmon's. */
         if (p->version != CROSSPAGE_VERSION_TOT) {
            Warning("crosspage version mismatch: "
                    "vmmon claims %#x, must match vmx version of %#x.\n",
                    CROSSPAGE_VERSION_TOT, p->version);
            return 1;
         }
      }
      {
         /* The following constants are the size and offset of the
          * VMCrossPage->crosspage_size field as defined by the
          * vmm/vmx.
          */
         ASSERT_ON_COMPILE(offsetof(VMCrossPage, crosspage_size) == 
                           sizeof(uint32));
         ASSERT_ON_COMPILE(sizeof(p->crosspage_size) == sizeof(uint32));

         if (p->crosspage_size != sizeof(VMCrossPage)) {
            Warning("crosspage size mismatch: "
                    "vmmon claims %#x bytes, must match vmm size of %#x bytes.\n",
                    (int)sizeof(VMCrossPage), p->crosspage_size);
            return 1;
         }
      }

      p->crosspageMA = MPN_2_MA(crossPageMPN);
      p->hostCrossPageLA = (LA64)(uintptr_t)p;

      p->irqRelocateOffset[0]  = IRQ_HOST_INTR1_BASE; 
      p->irqRelocateOffset[1]  = IRQ_HOST_INTR2_BASE;
      p->userCallRequest       = MODULECALL_USERCALL_NONE;
      p->moduleCallInterrupted = FALSE;
      p->pseudoTSCConv.mult    = 1;
      p->pseudoTSCConv.shift   = 0;
      p->pseudoTSCConv.add     = 0;

      vm->crosspage[vcpuid]    = p;
   }

   return 0;
}


static Bool
DisableNMIDelivery(volatile uint32 *regPtr) // IN/OUT
{
   uint32 reg;
   
   reg = *regPtr;
   if ((APIC_LVT_DELVMODE(reg) == APIC_LVT_DELVMODE_NMI) &&
       (! APIC_LVT_ISMASKED(reg))) {
      *regPtr = reg | APIC_LVT_MASK;
      dummyLVT = *regPtr; // Force completion of masking, Bug 78470.
      return TRUE;
   }
   return FALSE;
}

#define NMI_LINT0	(0x01)
#define NMI_LINT1	(0x02)
#define NMI_TIMER	(0x04)
#define NMI_ERR		(0x08)
#define NMI_PC		(0x10)
#define NMI_THERM	(0x20)

static unsigned int
DisableNMI2(VMDriver *vm)   // IN
{
   unsigned int nmi = 0;
   if (vm->hostAPIC) {
      switch (APIC_MAX_LVT(vm->hostAPIC)) {
         default:
         case 5:
	    if (DisableNMIDelivery(&APIC_THERM_REG(vm->hostAPIC))) {
	       nmi |= NMI_THERM;
	    }
	 case 4:
	    if (DisableNMIDelivery(&APIC_PC_REG(vm->hostAPIC))) {
	       nmi |= NMI_PC;
	    }
	 case 3:
	 case 2:
	 case 1:
	 case 0:
	    if (DisableNMIDelivery(&APIC_LINT0_REG(vm->hostAPIC))) {
	       nmi |= NMI_LINT0;
	    }
	    if (DisableNMIDelivery(&APIC_LINT1_REG(vm->hostAPIC))) {
	       nmi |= NMI_LINT1;
	    }
	    if (DisableNMIDelivery(&APIC_TIMER_REG(vm->hostAPIC))) {
	       nmi |= NMI_TIMER;
	    }
	    if (DisableNMIDelivery(&APIC_ERR_REG(vm->hostAPIC))) {
	       nmi |= NMI_ERR;
	    }
	    break;
      }
   }
   return nmi;
}

static unsigned int
DisableNMI(VMDriver *vm)   // IN
{
   unsigned int nmi = DisableNMI2(vm);
   unsigned int nmi2;
   
   /* Notify about race between timer and NMI on P4. It disables PC interrupt
      delivery when timer triggers, but actual delivery of NMI can race with us,
      and NMI handler can enable it under us. So we do it twice, hoping for
      the best... */
   nmi2 = DisableNMI2(vm);
   return nmi | nmi2;
}


static void
RestoreNMI(VMDriver *vm,  // IN
           unsigned int nmi) // IN
{
#define RestoreNMIDelivery(cond, apicr)				\
   do {								\
      if (nmi & cond) {						\
         uint32 reg;						\
	 							\
         reg = apicr;						\
         apicr = reg & ~APIC_LVT_MASK;				\
      }								\
   } while (0)

   RestoreNMIDelivery(NMI_LINT0, APIC_LINT0_REG(vm->hostAPIC));
   RestoreNMIDelivery(NMI_LINT1, APIC_LINT1_REG(vm->hostAPIC));
   RestoreNMIDelivery(NMI_TIMER, APIC_TIMER_REG(vm->hostAPIC));
   RestoreNMIDelivery(NMI_ERR,   APIC_ERR_REG(vm->hostAPIC));
   RestoreNMIDelivery(NMI_PC,    APIC_PC_REG(vm->hostAPIC));
   RestoreNMIDelivery(NMI_THERM, APIC_THERM_REG(vm->hostAPIC));
#undef RestoreNMIDelivery
}


/*
 *-----------------------------------------------------------------------------
 *
 * TaskSaveDebugRegisters --
 *
 *      Save debug registers in the host context area of the crosspage.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      crosspage->hostDR[*] = filled with debug register contents
 *                hostDRInHW = all bits set indicating hardware DR contents 
 *                             currently match what the host wants
 *               hostDRSaved = bit set for those we wrote to hostDR[*] array
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
TaskSaveDebugRegisters(VMCrossPageTOT *crosspage)
{
   uintptr_t drReg;

   /*
    * Save DR7 since we need to disable debug breakpoints during the
    * world switch code.  Save DR6 in order to accomodate the ICEBP
    * instruction.  All other debug registers are saved lazily by the
    * monitor prior to first use.  NOTE: Since monitor is in legacy
    * mode save all DRs for x86_64 here.
    */

   crosspage->hostDRSaved = 0xC0;  // we always save DR6,DR7
   crosspage->hostDRInHW  = 0xFF;

#define SAVE_DR(n)                             \
           GET_DR##n(drReg);                   \
           crosspage->hostDR[n] = drReg

   if (TaskInLongMode() && !crosspage->runVmm64) {

      /*
       * The host might be using the top 32 bits of the debug registers but the 
       * monitor is only a 32-bit monitor, so it might just wipe out the top 
       * halves of DR0..DR3.  So we must save them before letting the monitor 
       * touch them.
       */

      /*
       * If we're in compatibility mode, we still have to save the top 32 bits 
       * of the debug registers.  So shift into 64-bit mode temporarily to save 
       * them.  
       */
      if (TaskInCompatMode()) {
#if defined(__GNUC__) && !defined(VM_X86_64)
         asm volatile ("lcall %1,$TaskCM_SaveDebugRegisters64"
            :
            : "c" (crosspage->hostDR), 
              "i" (KERNEL64_CS)
            : "eax", "edx", "cc", "memory");
#else
         // Windows never runs in compatibility mode
         TS_ASSERT(0);
#endif
      } else {
         /*
          * Either in legacy or 64-bit mode, just do a normal save.
          */
         SAVE_DR(0);
         SAVE_DR(1);
         SAVE_DR(2);
         SAVE_DR(3);
      }
      crosspage->hostDRSaved = 0xCF;  // DR0..3,6,7 are being saved
   }

   /*
    * These are always just 32 bits wide so it doesn't matter if the top halves 
    * get saved or not.
    */
   SAVE_DR(6);
   SAVE_DR(7);
#undef SAVE_DR
}


/*
 *-----------------------------------------------------------------------------
 *
 * TaskRestoreDebugRegisters --
 *
 *      Put the debug registers back the way they were when 
 *      TaskSaveDebugRegisters was called.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      Debug resgisters restored from values saved in the crosspage.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
TaskRestoreDebugRegisters(VMCrossPageTOT *crosspage)
{
   uintptr_t drReg;

#define RESTORE_DR(n) if ((crosspage->hostDRInHW & (1 << n)) == 0) {  \
                          drReg = (uintptr_t)(crosspage->hostDR[n]);  \
                          SET_DR##n(drReg);                           \
                      }

   if (TaskInCompatMode()) {
#if defined(__GNUC__) && !defined(VM_X86_64)
      uint32 edxGetsWiped;

      asm volatile ("lcall %3,$TaskCM_RestoreDebugRegisters64"
         : "=d" (edxGetsWiped)
         : "c" (crosspage->hostDR), 
           "0" (crosspage->hostDRInHW), 
           "i" (KERNEL64_CS)
         : "eax", "cc", "memory");
#else
      // Windows never runs in compatibility mode
      TS_ASSERT(0);
#endif
   } else {
      RESTORE_DR(0);
      RESTORE_DR(1);
      RESTORE_DR(2);
      RESTORE_DR(3);
   }

   RESTORE_DR(6);
   RESTORE_DR(7);

   if (UNLIKELY(crosspage->restoreGeneralDetect)) {
      crosspage->restoreGeneralDetect = 0;
      drReg = (uintptr_t)(crosspage->hostDR[7]) | DR7_GD;
      SET_DR7(drReg);
   }

#undef RESTORE_DR
}


/*
 *-----------------------------------------------------------------------------
 *
 * TaskUpdatePTSCParameters --
 *     
 *      Mac only: if the PTSC is behind where it should be, based on the host's
 *      uptime, then adjust the PTSC parameters.  PR 118376.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      May update the PTSC parameters.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
TaskUpdatePTSCParameters(VMCrossPageTOT *crosspage)
{
#ifdef __APPLE__
   static VmTimeStart startTime;
   static uint64      freq;
   static uint32      scale;
   const int          SHIFT = 20;

   ASSERT_NO_INTERRUPTS();
   if (UNLIKELY(RateConv_IsIdentity(&crosspage->pseudoTSCConv))) {
      startTime.count = RDTSC();
      startTime.time  = HostIF_ReadUptime();
      TS_ASSERT(crosspage->pseudoTSCConv.add == 0);
      crosspage->pseudoTSCConv.mult  = 2;
      crosspage->pseudoTSCConv.shift = 1;
      freq  = HostIF_UptimeFrequency(); 
      scale = 1000 * ((uint64)Vmx86_GetkHzEstimate(&startTime) << SHIFT) / freq;
   } else {
      uint64 TSC;
      uint64 uptime;
      uint64 uptimeDelta;
      int64  PTSCDelta;
      uint64 expectedPTSC;

      TS_ASSERT(crosspage->pseudoTSCConv.mult  == 2 &&
                crosspage->pseudoTSCConv.shift == 1);

      TSC          = RDTSC();                             
      uptime       = HostIF_ReadUptime();
      uptimeDelta  = uptime - startTime.time;
      expectedPTSC = (uptimeDelta * scale >> SHIFT) + startTime.count;
      PTSCDelta    = expectedPTSC - TSC;
      if (crosspage->pseudoTSCConv.add < PTSCDelta) {
         crosspage->pseudoTSCConv.add = PTSCDelta;
      }
      if (HIDWORD(uptimeDelta) > 100) {
         /* Reset startTime so that uptimeDelta * scale can't overflow. */
         uint64 PTSC = TSC + crosspage->pseudoTSCConv.add;
         /* It might be nice to assert that the calculation of expectedPTSC
            didn't overflow.  But, for F&F, this ASSERT is of dubious value.  */
         // TS_ASSERT(MAX_UINT64 / uptimeDelta > scale);

         /* Set startTime s.t. uptimeDelta of 0 corresponds to current PTSC. */
         startTime.time  = uptime;
         startTime.count = PTSC;
      }
   }
#endif
}


/*
 *-----------------------------------------------------------------------------
 *
 * TaskCMHostSwitchToMonitor32/64 --
 *
 *      Switch from compatibility mode host to 32-bit or 64-bit monitor.
 *
 *      The Mac is in compatibility mode at this point.  So we'll switch it to 
 *      true 64-bit mode as we call the worldswitch code.  This enables the 
 *      worldswitch code to save 64-bit register contents.
 *
 * Results:
 *      None.
 *
 * Side effects:
 *      The monitor does many things.
 *
 *-----------------------------------------------------------------------------
 */

static INLINE_SINGLE_CALLER void
TaskCMHostSwitchToMonitor32(VMCrossPageTOT *crosspage)
{
#if defined(__GNUC__) && !defined(VM_X86_64)
   uint8 *addr = crosspage->wsModule + wsMod(crosspage)->hostToVmm32;
   uint32 eaxGetsWiped, ecxGetsWiped;

   asm volatile ("lcall %4,$TaskCM_CallWS32"
         : "=a" (eaxGetsWiped), 
           "=c" (ecxGetsWiped)
         : "0" (addr), 
           "1" (crosspage), 
           "i" (KERNEL64_CS)
         : "edx", "cc", "memory");
#else
   // Windows never runs in compatibility mode
   TS_ASSERT(0);
#endif
}

static INLINE_SINGLE_CALLER void
TaskCMHostSwitchToMonitor64(VMCrossPageTOT *crosspage)
{
#if defined(__GNUC__) && !defined(VM_X86_64)
   uint8 *worldSwitch = crosspage->wsModule + wsMod(crosspage)->hostToVmm64;
   uint32 hostContext = (uint32)&crosspage->hostContext64;
   uint32 monContext  = (uint32)&crosspage->monContext64;
   uint32 eaxGetsWiped, ecxGetsWiped, edxGetsWiped;

   asm volatile ("lcall %6,$TaskCM_CallWS64"
         : "=a" (eaxGetsWiped), 
           "=c" (ecxGetsWiped), 
           "=d" (edxGetsWiped)
         : "0" (worldSwitch), 
           "1" (monContext), 
           "2" (hostContext), 
           "i" (KERNEL64_CS)
         : "cc", "memory");
#else
   // Windows never runs in compatibility mode
   TS_ASSERT(0);
#endif
}

static INLINE_SINGLE_CALLER void
TaskCMHostSwitchToMonitor(VMCrossPageTOT *crosspage)
{
   if (crosspage->runVmm64) {
      TaskCMHostSwitchToMonitor64(crosspage);
   } else {
      TaskCMHostSwitchToMonitor32(crosspage);
   }
}

#ifdef VM_X86_64
/*
 * From an email with Petr regarding gcc's handling of the stdcall
 * attribute for x86-64:
 *
 *    As far as I can tell, for x86_64 there is only one calling
 *    convention:
 *       On Linux rdi/rsi/rdx/rcx/r8d/r9d for <= 6 arguments,
 *       others always on stack, caller always adjusts stack.
 *
 *       On Windows it is rcx/rdx/r8d/r9d for <= 4 arguments, rest on
 *       stack.  When more than 4 arguments are passed, spill space is
 *       reserved on the stack for the register arguments.  Argument
 *       5 is accessed at (5 * 8)(rsp).
 */

/*
 *-----------------------------------------------------------------------------
 *
 * SwitchToMonitor --
 *
 *      Wrapper that calls code to switch from the host to the monitor.
 *
 * Side effects:
 *      None for the module
 *
 *-----------------------------------------------------------------------------
 */

template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor32_TOT(VMCrossPage *crosspage)
{
#if defined(__GNUC__)
#define PROTOPARMS32  uint32       zero0   /* rdi */,   \
                      uint32       zero1   /* rsi */,   \
                      uint32       zero2   /* rdx */,   \
                      VMCrossPage *cp      /* rcx */
#define PARMS32 0 /* rdi */, 0 /* rsi */, 0 /* rdx */, crosspage /* rcx */
#elif defined(_MSC_VER)
#define PROTOPARMS32 VMCrossPage *cp /* rcx */
#define PARMS32 crosspage /* rcx */
#else
#error No compiler defined for 64-bit SwitchToMonitor32
#endif
   typedef void (*SwitchFn)(PROTOPARMS32);
   const uint8    *addr        = crosspage->wsModule
                               + wsMod(crosspage)->hostToVmm32;
   const SwitchFn  worldSwitch = (SwitchFn)addr;
   worldSwitch(PARMS32);
#undef PROTOPARMS32
#undef PARMS32
}

template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor64_TOT(VMCrossPage *crosspage)
{
#if defined(__GNUC__)
   typedef void (*SwitchFn)(uint32   zero0  /* rdi */,
                            uint32   zero1  /* rsi */,
                            uint64   src    /* rdx */,
                            uint64   dest   /* rcx */,
                            Selector hostCS /* r8d */);
#define SWITCHFN64(_fn, _src, _dest, _hostCS)           \
               _fn(0       /* rdi */, 0     /* rsi */,  \
                   _src    /* rdx */, _dest /* rcx */,  \
                   _hostCS /* r8d */)
#elif defined(_MSC_VER)
   typedef void (__cdecl *SwitchFn)(uint64   dest   /* rcx */,
                                    uint64   src    /* rdx */,
                                    Selector hostCS /* r8d */);
#define SWITCHFN64(_fn, _src, _dest, _hostCS)           \
               _fn(_dest   /* rcx */, _src /* rdx */,   \
                   _hostCS /* r8d */)
#else
#error No compiler defined for 64-bit SwitchToMonitor64
#endif

   const uint8    *worldSwitch = crosspage->wsModule
                               + wsMod(crosspage)->hostToVmm64;
   const SwitchFn  fn          = (SwitchFn)worldSwitch;
   const uint64    hostContext = (uint64)&crosspage->hostContext64;
   const uint64    monContext  = (uint64)&crosspage->monContext64;
   const Selector  hostCS      = (Selector)crosspage->hostContext64.context.cs;

   SWITCHFN64(fn, hostContext, monContext, hostCS);
#undef SWITCHFN64
}

#else   /* VM_X86_64 */

/*
 *-----------------------------------------------------------------------------
 *
 * SwitchToMonitor --
 *
 *      Wrapper that calls code to switch from the host to the monitor.
 *
 * Side effects:
 *      None for the module
 *
 *-----------------------------------------------------------------------------
 */
template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor32_TOT(VMCrossPage *crosspage)
{
#if defined(__GNUC__)
#define SWITCHFN (__attribute__((stdcall, regparm(0))) *SwitchFn)
#elif defined(_MSC_VER)
#define SWITCHFN (__stdcall *SwitchFn)   
#else
#error No compiler defined for 32-bit SwitchToMonitor32
#endif
   typedef void SWITCHFN(Selector     cs,
                         ContextInfo *src,
                         ContextInfo *dst,
                         uint32       newDstVA);

   ContextInfo    *src     = &(crosspage->hostContext);
   ContextInfo    *dst     = &(crosspage->monContext);
   const uint8    *codePtr = crosspage->wsModule
                           + wsMod(crosspage)->hostToVmm32;
   const Selector  hostCS  = crosspage->hostContext.task.cs;
    /*
    * newDstVA and vm->crosspage->monContext point to the same
    * location.  The latter refers to the monitor's virtual address
    * space.
    */
   uint32 newDstVA  = (uint32)(VPN_2_VA(MODULECALL_CROSS_PAGE_START) +
                               offsetof(VMCrossPage, monContext));    
   SwitchFn fn = (SwitchFn)codePtr;
   fn(hostCS, src, dst, newDstVA);
#undef SWITCHFN
}

template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor64_TOT(VMCrossPage *crosspage)
{
#if defined(__GNUC__)
#define SWITCHFN (__attribute__((stdcall, regparm(0))) *SwitchFn)
#elif defined(_MSC_VER)
#define SWITCHFN (__stdcall *SwitchFn)   
#else
#error No compiler defined for 32-bit SwitchToMonitor64
#endif
   typedef void SWITCHFN(Selector     cs,
                         VMCrossPage *crosspage);

   const uint8    *codePtr = crosspage->wsModule
                           + wsMod(crosspage)->hostToVmm64;
   const Selector  hostCS  = crosspage->hostContext.task.cs;
   SwitchFn fn = (SwitchFn)codePtr;
   fn(hostCS, crosspage);
#undef SWITCHFN
}

#endif // VM_X86_64

template <class VMCrossPage>
static TEMPLATE_INLINE_SINGLE_CALLER void
SwitchToMonitor_TOT(VMCrossPage *crosspage)
{
   if (crosspage->runVmm64) {
      SwitchToMonitor64_TOT(crosspage);
   } else {
      SwitchToMonitor32_TOT(crosspage);
   }
}


/*
 *-----------------------------------------------------------------------------
 *
 * Task_Switch --
 *
 *      Switches from the host context into the monitor
 *      context. Think of it as a coroutine switch that changes
 *      not only the registers, but also the address space
 *      and all the hardware state.
 *
 * Results:
 *      Next module call (or user call for that matter) is
 *      returned.
 *
 * Side effects:
 *      Jump to the other side. Has no direct effect on the
 *      host-visible state except that it might generate an interrupt.
 *
 *-----------------------------------------------------------------------------
 */

void 
Task_Switch_TOT(VMDriver *vm,  // IN
                Vcpuid vcpuid) // IN
{
   uintptr_t   flags, cr0reg, cr2reg, cr4reg, new_cr4;
   uint64      fs64  = 0;
   uint64      gs64  = 0;
   uint64      kgs64 = 0;
   DTR64       hostGDT64;
   Selector    cs, gs, fs;
   Selector    trReg;
   Selector    hostLDT;
   unsigned    lint;
   VMCrossPageTOT *crosspage = (VMCrossPageTOT *)vm->crosspage[vcpuid];
   
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromHost();
#endif
   lint = DisableNMI(vm);
   SAVE_FLAGS(flags); 
   CLEAR_INTERRUPTS();

   vm->currentHostCpu[vcpuid] = HostIF_GetCurrentPCPU();
   
   /*
    * Save CR state (not CR3!)
    */
   GET_CR0(cr0reg);
   GET_CR2(cr2reg);
   GET_CR4(cr4reg);

   /* Ensure global pages are flushed */
   new_cr4 = cr4reg & ~CR4_PGE;
   SET_CR4(new_cr4);

   crosspage->hostCR4 = new_cr4;

   TaskSaveDebugRegisters(crosspage);

   /*
    * Try to disable debug exceptions during the switch.
    * Unfortunately we cannot do this reliably, as the host
    * may have set DR7_GD.  This will cause the SET_DR7 to
    * trap, and the host trap handler can then put whatever
    * it wants in DR7 and resume after the SET_DR7.  We fix
    * this in the monitor, with our own trap handler.
    */

   if (UNLIKELY(crosspage->hostDR[7] & DR7_ENABLED)) {
      SET_DR7(DR7_DEFAULT);
   }

   TaskUpdatePTSCParameters(crosspage);

   if (TaskInCompatMode()) {
#if defined(__GNUC__) && !defined(VM_X86_64)
      asm volatile ("leal  %0,%%eax\n"
                    "lcall %1,$TaskCM_SaveGDT64"
         : "=m" (hostGDT64)
         : "i" (KERNEL64_CS)
         : "eax", "cc", "memory");
#else
      // Windows never runs in compatibility mode
      TS_ASSERT(0);
      hostGDT64.limit  = 0;
      hostGDT64.offset = 0;
#endif
   } else {
      hostGDT64.offset = 0;
      _Get_GDT((DTR *)&hostGDT64);
   }

   /*
    * GS and FS are saved outside of the SwitchToMonitor() code to 1)
    * minimize the amount of code handled there and 2) prevent us from
    * faulting if they happen to be in the LDT (since the LDT is saved and
    * restored here too) and 3) make sure that if we do fault (because the
    * uncached descriptor for GS or FS's Selector has become invalid) we
    * fault in the host's context rather than the monitor or, worse,
    * never-never land. --Jeremy.
    */

   cs = GET_CS();
   gs = GET_GS();
   fs = GET_FS();
   GET_LDT(hostLDT);
   GET_TR(trReg);

   if (TaskInLongMode()) {
      kgs64 = GET_KernelGS64();
      gs64  = GET_GS64();
      fs64  = GET_FS64();
      crosspage->hostContext64.context.cs  = cs;
      crosspage->hostContext64.context.ldt = hostLDT;
      crosspage->hostContext64.tr          = trReg;
   } else {
      crosspage->hostContext.task.cs  = cs;
      crosspage->hostContext.task.ldt = hostLDT;
      crosspage->hostContext.tr       = trReg;
   }
   
   SetupTemporaryGDT_TOT(vm->currentHostCpu[vcpuid], crosspage, hostGDT64,
                         hostLDT, cs, trReg);

   /* To return to the task, mark it as unused. */
   if (trReg) {
      TS_ASSERT((trReg & 7) == 0);
      if (TaskInCompatMode()) {
#if defined(__GNUC__) && !defined(VM_X86_64)
         uint32 eaxGetsWiped;

         asm volatile ("lcall %3,$TaskCM_ClearTREntry64"
            : "=a" (eaxGetsWiped)
            : "c" (&(crosspage->hostContext64.gdtr)), 
              "0" ((uint32)trReg), 
              "i" (KERNEL64_CS)
            : "cc", "memory");
#else
         // Windows never runs in compatibility mode
         TS_ASSERT(0);
#endif
      } else {
         Descriptor *desc;
         if (vm_x86_64) {
            desc = (Descriptor *)(VA)
               (HOST_KERNEL_LA_2_VA(crosspage->hostContext64.gdtr.dtr.offset) 
               + trReg);
         } else {
            desc = (Descriptor *)(VA)
               (HOST_KERNEL_LA_2_VA(crosspage->hostContext.gdtr.dtr.offset) 
               + trReg);
         }
         if (Desc_Type(desc) == TASK_DESC_BUSY) {
            Desc_SetType(desc, TASK_DESC);
         }
      }
   }

   if (TaskInCompatMode()) {
      TaskCMHostSwitchToMonitor(crosspage);
   } else {
      SwitchToMonitor_TOT(crosspage);
   }

   SET_CR0(cr0reg);
   SET_CR2(cr2reg);
   SET_CR4(cr4reg);
   /* The monitor shouldn't modify CR8 */

   if (USE_TEMPORARY_GDT) {
      /* When enabled, vmmon is reentered on the temporary GDT. */
      _Set_GDT((DTR *)&hostGDT64);
   }

   /* restore fs/gs must come before 64 bit fs/gs restore */
   SET_FS(fs);
   SET_GS(gs);
   if (TaskInLongMode()) {
      SET_FS64(fs64);
      SET_GS64(gs64);
      SET_KernelGS64(kgs64);
   }

   TaskRestoreDebugRegisters(crosspage);

   ASSERT_NO_INTERRUPTS();

   if (crosspage->moduleCallType == MODULECALL_TOT_INTR) {
      /*
       * Note we must do the RAISE_INTERRUPT before ever enabling
       * interrupts or bad things have happened (might want to know exactly
       * what bad things btw).
       * Note2: RAISE_INTERRUPT() only takes an constant and hence with switch
       * statement.
       */

#define IRQ_INT(_x) case _x: RAISE_INTERRUPT(_x); break 
#define IRQ_INT2(_x) IRQ_INT(_x); IRQ_INT(_x + 1)
#define IRQ_INT4(_x) IRQ_INT2(_x); IRQ_INT2(_x + 2)
#define IRQ_INT8(_x) IRQ_INT4(_x); IRQ_INT4(_x + 4)
#define IRQ_INT16(_x) IRQ_INT8(_x); IRQ_INT8(_x + 8)
#define IRQ_INT32(_x) IRQ_INT16(_x); IRQ_INT16(_x + 16)

      switch (crosspage->args[0]) {
	 // These are the general IO interrupts
	 // It would be nice to generate this dynamically, but see Note2 above.

	 /*
	  * Pass Machine Check Exception (Interrupt 0x12) to the host.
	  * See bug #45286 for details.
	  */
	 IRQ_INT(0x12);

         /*
          * pass the reserved vectors (20-31) as well. amd64 windows
          * generates these.
          */
	 IRQ_INT8(0x14);
	 IRQ_INT4(0x1c);

	 IRQ_INT32(0x20);
	 IRQ_INT32(0x40);
	 IRQ_INT32(0x60);
	 IRQ_INT32(0x80);
	 IRQ_INT32(0xa0);
	 IRQ_INT32(0xc0);
	 IRQ_INT32(0xe0);

      default: 
	 /*
	  * XXXX nt
	  * running on a 2 processor machine we hit this Panic with int 0xD1 0x61 ...
	  */
	 Warning("Received Unexpected Interrupt: 0x%X in Task_Switch()\n", crosspage->args[0]);
	 Panic("Received Unexpected Interrupt: 0x%X\n", crosspage->args[0]);
      }
   }
   
   vm->currentHostCpu[vcpuid] = INVALID_HOST_CPU;

   RESTORE_FLAGS(flags);
   RestoreNMI(vm, lint);
#if defined(linux) && defined(USE_PERFCTRS_HOSTED)
   PerfCtr_SnapshotFromMonitor();
#endif
}


#include "task_compat.h"
