/* **********************************************************
 * Copyright 1998 VMware, Inc.  All rights reserved. -- VMware Confidential
 * **********************************************************/


/*
 * modulecall.h
 *
 *        Monitor <-->  Module (kernel driver) interface
 */

#ifndef _MODULECALL_H
#define _MODULECALL_H

#define INCLUDE_ALLOW_VMCORE
#define INCLUDE_ALLOW_VMMON
#include "includeCheck.h"

#include "x86.h"
#include "vm_time.h"
#include "vcpuid.h"
#include "vcpuset.h"
#include "vmm_constants.h"
#include "contextinfo.h"
#include "x86types.h"

/*
 *----------------------------------------------------------------------
 *
 * ModuleCallType --
 *
 *      Enumaration of support calls done by the module
 *
 *----------------------------------------------------------------------
 */

typedef enum ModuleCallTypeTOT {
   MODULECALL_TOT_NONE = 100,

   MODULECALL_TOT_INTR,                  
   MODULECALL_TOT_SEMAWAIT,
   MODULECALL_TOT_SEMASIGNAL,
   MODULECALL_TOT_SEMAFORCEWAKEUP,
   MODULECALL_TOT_IPI,                   // hit thread with IPI
   MODULECALL_TOT_SWITCH_TO_PEER,

   /*
    * Return codes for user calls
    */

   MODULECALL_TOT_USERRETURN,
   MODULECALL_TOT_USERTIMEOUT,

   MODULECALL_TOT_GET_RECYCLED_PAGE,
   MODULECALL_TOT_RELEASE_ANON_PAGE,
   MODULECALL_TOT_IS_ANON_PAGE,

   MODULECALL_TOT_LAST                   // Number of entries. Must be the last one
} ModuleCallTypeTOT;


#define MODULECALL_USER_START 300
#define MODULECALL_USER_END   399

#define MODULECALL_CROSS_PAGE_LEN    1
#define MODULECALL_CROSS_PAGE_START  6

// we don't want it in the final version
//#define USE_BLUE_SCREEN

#define MODULECALL_USERCALL_NONE     300

/*
 * Define VMX86_UCCOST in the makefiles (Local.mk,
 * typically) if you want a special build whose only purpose
 * is to measure the overhead of a user call and its
 * breakdown.
 *
 * WINDOWS NOTE: I don't know how to pass VMX86_UCCOST to
 * the driver build on Windows.  It must be defined by hand.
 *
 * ESX Note: we don't have a crosspage in which to store these
 * timestamps.  Such a feature would perhaps be nice (if we
 * ever tire of the argument that esx does so few usercalls
 * that speed doesn't matter).
 */

#if defined(VMX86_UCCOST) && !defined(VMX86_SERVER)
#define UCTIMESTAMP(cp, stamp) \
             do { (cp)->ucTimeStamps[UCCOST_ ## stamp] = RDTSC(); } while (0)
#else
#define UCTIMESTAMP(cp, stamp)
#endif

#ifdef VMX86_SERVER
typedef struct UCCostResults {
   uint32 vmksti;
   uint32 vmkcli;
   uint32 ucnop;
} UCCostResults;
#else

typedef struct UCCostResults {
   uint32 htom;
   uint32 mtoh;
   uint32 ucnop;
} UCCostResults;

typedef enum UCCostStamp {
#define UC(x) UCCOST_ ## x,
#include "uccostTable.h"
   UCCOST_MAX
} UCCostStamp;
#endif // VMX86_SERVER

typedef struct LongModeSwitch {
   DTR64           switchGdtr64;
   uint16          switchDS;
   FarPtr32        jump32Code;       /* &worldswitch_64h_32v_mode_32compat */
   uint16          _pad;
   FarPtr32        jump64Code;       /* &worldswitch_64h_32v_mode_64 */
   uint16          _pad2;
   uint32          _pad3;
   uint64          farPtr;           /* &worldswitch_64h_32v_mon_switch */
   DescriptorUnion switchGdt[4];
   uint64          hostSwitchCR3;
   uint64          switchCR3SwitchL4E;
   uint64          switchCR3SwitchL4EMA;   
} __attribute__ ((packed)) LongModeSwitch;

#define WS_ALIGN(_s) (((_s) + 15) & ~15)
#define WS_MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))

/* The sizes for the following loadable worldswitch code sections are
 * found by running '$VMTREE/support/scripts/worldswitch_sections'
 * after a build of the monitor has completed successfully.
 */
#define HWS_32H_32V WS_ALIGN(0x0000007b)
#define HWS_64H_64V WS_ALIGN(0x000000d0)
#define HWS_64H_32V WS_ALIGN(0x000000b4)
#define VWS_64H_32V WS_ALIGN(0x000000be)
#define HWS_32H_64V WS_ALIGN(0x00000120)
#define VWS_32H_64V WS_ALIGN(0x000000f3)

#define WS_CODE_STD WS_MAX(HWS_32H_32V, HWS_64H_32V)
#define WS_CODE_VMM WS_MAX(VWS_32H_64V, VWS_64H_32V)
#define WS_CODE_64  WS_MAX(HWS_32H_64V, HWS_64H_64V)

typedef struct ContextSwitch {
   uint8  hostXToVmm32[WS_CODE_STD]; /* hws_32h_32v -or- hws_64h_32v */
   uint8  vmmXToHostY[WS_CODE_VMM];  /* vws_32h_64v -or- vws_64h_32v */
   uint8  hostXToVmm64[WS_CODE_64];  /* hws_32h_64v -or- hws_64h_64v */
} ContextSwitch;

#ifndef VMX86_SERVER

#define SHADOW_DR64(cp, n)    (cp->_shadowDR[n].ureg64)
#define SHADOW_DR32(cp, n)    (cp->_shadowDR[n].ureg32)
#ifdef VMM64
#define SHADOW_DR(cp, n) SHADOW_DR64(cp, n)
#else
#define SHADOW_DR(cp, n) SHADOW_DR32(cp, n)
#endif

#else  //VMX86_SERVER

#define SHADOW_DR(cp, n) (cp->shadowDR[n])

#endif  //VMX86_SERVER

/*
 *----------------------------------------------------------------------
 *
 * VMCrossPage --
 *
 *      data structure shared between the monitor and the module
 *      that is used for crossing between the two.
 *      Accessible as vm->cross (kernel module) and CROSS_PAGE
 *      (monitor)
 *
 *      Exactly one page long
 *
 *----------------------------------------------------------------------
 */

typedef struct VMCrossPageTOT {
   /*
    * Version checking. Should remain at offset 0
    */
   uint32 version;
   uint32 _pad[3]; /* Align context switching code on 16 byte boundary. */

   ContextSwitch contextSwitch;

   /* Pad to offset 1024 to appease the P4 trace cache.  Smaller than
    * 1024 causes the trace cache to be flushed each time data is
    * written to the context switch.
    */
   uint8 _trace_pad[1024 - (16 /* alignment */ + sizeof(ContextSwitch))];

   /*
    * The interrupt redirection bitmap, must immediately follow
    * monContext.task (ASSERT fail otherwise).
    */
   ContextInfo hostContext;                 /* host32 */
   ContextInfo monContext;                  /* vmm32 */
   uint8       interruptRedirectionBitMap[INTERRUPT_REDIRECTION_BITMAP_SIZE]; /* vmm32 */

   /*
    * Most significant bits (10 if the host is not in PAE mode, 11 if the host
    * is in PAE mode) of the linear address of the crosspage, which allow to
    * locate the page directory entry associated to the crosspage --hpreg
    */
   uint32 switchLinearRangeIndex;           /* vmm32 */
   uint32 switchL4Off;                      /* vmm64 */
   MPN32  crossMonPageTableMPN;             /* vmm32 */

   /* PDE that points to switch page table */
   VM_PDE  switchPDE;                       /* vmm32 */
   VM_PAE_PDE paeSwitchPDE;                 /* vmm32 */
   VM_L4E lmSwitchPE;                       /* vmm64 */

   /*
    * The monitor may requests up to two actions when
    * returning to the host.  The moduleCallType field and
    * args encode a request for some action in the driver.
    * The userCallType field (together with the RPC block)
    * encodes a user call request.  The two requests are
    * independent.  The user call is executed first, with
    * the exception of MODULECALL_INTR which has a special
    * effect.
    */
   ModuleCallTypeTOT moduleCallType;
   uint32 args[4];
   uint32 retval;

   int userCallType;
   volatile int userCallRequest;       // VCPU/VMX synchronization
   Bool userCallCross;
   Bool userCallRestart;

   /*
    * TRUE if moduleCall was interrupted by signal. Only
    * vmmon uses this field to remember that it should
    * restart RunVM call, nobody else should look at it.
    */
   Bool moduleCallInterrupted;
   Bool runVmm64;

   // host irq relocation values
   int irqRelocateOffset[2];

#if !defined(VMX86_SERVER)
   uint64 ucTimeStamps[UCCOST_MAX];
#endif

   /*
    * The values in the shadow debug registers must match
    * those in the hardware debug register immediately after
    * a task switch in either direction.  They are used to
    * minimize moves to and from the debug registers.
    */
   SharedUReg64 _shadowDR[8];
   Assert_MonSrcLoc switchError;

   SystemCallState systemCall;

   /*
    * Adjustment for machines where the hardware TSC does not run
    * constantly (laptops).  See VMK_SharedData for the ESX analog.
    * Within the monitor, PTSC_Get() = RDTSC() + tscAdjustment.  To
    * handle the case where the TSC slows or stops when the host OS
    * halts the processor using APM or ACPI, tscAdjustment is updated
    * whenever we go to userlevel.  To handle out of sync MP TSCs,
    * tscAdjustment will need to be per-PCPU and thus will need to be
    * updated whenever this VCPU migrates to a different PCPU.
    */
   VmRelativeTS tscAdjustment;

   /*
    * PTSC value and target VCPUSet for the next MonitorPoll callback,
    * as last known by the VCPU that this crosspage belongs to.  To
    * find up-to-date information, vmmon must loop through the
    * crosspages of all the VCPUs in this VM and take the one with the
    * latest time (ugh).  When the time arrives, if a target VCPU
    * thread is in the monitor, it wants to receive a hardware
    * interrupt (e.g., an IPI) as soon as possible; if it has called
    * up to userlevel to halt, it wants to wake up as soon as
    * possible.
    */
   VmAbsoluteTS monitorPollFirstTS;
   VCPUSet monitorPollFirstVCS;

   ContextInfo64 hostContext64;             /* host64 */

   /*
    * Location of crosspage in different address spaces.
    */
   LA32   vmm32CrossPageLA;                 /* vmm32 */
   LA64   vmm64CrossPageLA;                 /* vmm64 */
   LA64   hostCrossPageLA;
   MA64   crosspageMA;

   uint64 hostCR4;
   uint64 hostDR[8];
   uint16 hostDRSaved;             // Host DR spilled to hostDR[x].
   uint16 hostDRInHW;              // 0: shadowDR in h/w, 1: hostDR in h/w.
   Bool debugRegistersReady;       // DR invariants are active, may touch DRs
   Bool restoreGeneralDetect;      // vmmon must restore DR7_GD

   char _pad4[2];
   ContextInfo64 monContext64;              /* vmm64 */
   LongModeSwitch lmSwitch;                 /* vmm32+host64 or vmm64+host32 */
#ifdef __cplusplus
   Bool isMCIntr(void) { return moduleCallType == MODULECALL_TOT_INTR; }
   static unsigned int crosspageStart(void) { return 6; }
#endif
} __attribute__ ((packed)) VMCrossPageTOT;


#ifdef WIN32
#include <POPPACK.H>
#endif


/*
 * We hardcode these constants here instead of using the sizeof() and
 * offsetof() to catch compiler inconsistencies. This file is compiled
 * once onsite to build the monitor, and then once at the customer site
 * to build the drivers.
 *
 * An alternate scheme, and preferred scheme, would be have the makefile
 * generate them, thereby relieving the developer of the error prone
 * chore of updating them each time VMCrossPage is modified.
 *
 * Pratap 12/01.
 */
#define MODULECALL_CROSSPAGE_SIZE_TOT (0x9f0)
#define CROSSPAGE_VERSION_TOT    0x178b

#if !defined(VMX86_SERVER) && defined(VMM)
#define CROSS_PAGE  ((VMCrossPage * const) VPN_2_VA(CROSS_PAGE_START))
#define VMM_SWITCH_SHARED_DATA CROSS_PAGE
#endif

#define NULLPAGE_LINEAR_START  (MONITOR_LINEAR_START + \
                                PAGE_SIZE * CPL0_GUARD_PAGE_START)

#define USERCALL_TIMEOUT     100  // milliseconds

#define MX_WAITINTERRUPTED     3
#define MX_WAITTIMEDOUT        2
#define MX_WAITNORMAL          1  // Must equal one; see linux module code.
#define MX_WAITERROR           0  // Use MX_ISWAITERROR() to test for error.

// Any zero or negative value denotes error.
#define MX_ISWAITERROR(e)      ((e) <= MX_WAITERROR)

#define OFF64(_off) offsetof(ContextInfo64,_off)

#endif
