another directory rename: failstar -> fail

"failstar" sounds like a name for a cruise liner from the 80s.  As "*" isn't a
desirable part of directory names, just name the whole thing "fail/", the core
parts being stored in "fail/core/".

Additionally fixing two build system dependency issues:
 - missing jobserver -> protomessages dependency
 - broken bochs -> fail dependency (add_custom_target DEPENDS only allows plain
   file dependencies ... cmake for the win)


git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@956 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
hsc
2012-03-08 19:43:02 +00:00
commit b70b6fb43a
921 changed files with 473161 additions and 0 deletions

View File

@ -0,0 +1,127 @@
# plex86: run multiple x86 operating systems concurrently
# Copyright (C) 1999-2001 Kevin P. Lawton
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
CC = @CC@
CFLAGS = @CFLAGS@
LDFLAGS = @LDFLAGS@
KERNEL_TARGET = @KERNEL_TARGET@
HOST_TARGET = @HOST_TARGET@
HOST_CLEAN = @HOST_CLEAN@
srcdir = @srcdir@
VPATH = @srcdir@
LD = ld
HOST_O = @HOST_O@
# extra kernel CFLAGS and LDFLAGS for each host OS
KCFLAGS_LINUX = -fno-strength-reduce -fomit-frame-pointer \
-malign-loops=2 -malign-jumps=2 -malign-functions=2 \
-D__KERNEL__ -I@LINUX_SRC@/include -DCPU=586 -DMODULE
KLDFLAGS_LINUX = -r
KCFLAGS_NULL = -fno-strength-reduce -fomit-frame-pointer \
-malign-loops=2 -malign-jumps=2 -malign-functions=2 \
-D__KERNEL__ -DCPU=586
KLDFLAGS_NULL = -r
KCFLAGS_NETBSD = -fno-strength-reduce -nostdinc -fomit-frame-pointer \
-malign-loops=2 -malign-jumps=2 -malign-functions=2 \
-D_KERNEL -I@NETBSD_SRC@ -I@NETBSD_SRC@/arch -I. -D_LKM
KLDFLAGS_NETBSD = -r
KCFLAGS_FREEBSD = -fno-strength-reduce -nostdinc -fomit-frame-pointer \
-malign-loops=2 -malign-jumps=2 -malign-functions=2 \
-D_KERNEL -I@FREEBSD_SRC@ -I@FREEBSD_SRC@/sys -I. -D_LKM \
-DFREEBSD_PLEX86_DEBUG
KLDFLAGS_FREEBSD = -r
KCFLAGS_BEOS =
KLDFLAGS_BEOS = -nostdlib /boot/develop/lib/x86/_KERNEL_
KLDFLAGS = $(KLDFLAGS_@HOSTOS@)
ALL_CFLAGS = $(CFLAGS) $(KCFLAGS_@HOSTOS@) -I$(srcdir)/include -I$(srcdir)/..
.c.o:
$(CC) -c $(ALL_CFLAGS) $<
.S.o:
$(CC) -c $(ALL_CFLAGS) -D__ASSEMBLY__ $<
main_target: $(HOST_TARGET) $(KERNEL_TARGET)
@FULL_LINK@
$(KERNEL_TARGET): $(HOST_O) monitor-host.o \
nexus.o print-mon.o \
util-nexus.o \
fault-mon.o panic-mon.o \
paging-mon.o monitor-mon.o
$(LD) $(KLDFLAGS) $^ -o $@
clean: $(HOST_CLEAN) main_clean
main_clean:
/bin/rm -f *.o *.s $(KERNEL_TARGET) a.out
dist-clean: clean
/bin/rm -f Makefile
# Linux specific targets
linux-target:
linux-clean:
# Null specific targets
null-target:
null-clean:
# NetBSD specific targets
netbsd-target: netbsd-machine
netbsd-machine:
ln -sf @NETBSD_SRC@/arch/i386/include machine
netbsd-clean:
/bin/rm -f machine
# FreeBSD specific targets
freebsd-target: freebsd-machine
[ -r opt_posix.h ] || touch opt_posix.h
freebsd-machine:
ln -sf @FREEBSD_SRC@/i386/include machine
freebsd-clean:
/bin/rm -f opt_posix.h
/bin/rm -f machine
# BeOS specific targets
beos-install: $(KERNEL_TARGET)
cp -f $(KERNEL_TARGET) /boot/home/config/add-ons/kernel/drivers/bin
mkdir -p /boot/home/config/add-ons/kernel/drivers/dev/misc
ln -sf ../../bin/$(KERNEL_TARGET) /boot/home/config/add-ons/kernel/drivers/dev/misc/$(KERNEL_TARGET)
Makefile: Makefile.in ../config.status
cd ..; CONFIG_FILES=kernel/Makefile CONFIG_HEADERS= $(SHELL) config.status

View File

@ -0,0 +1,306 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* fault-mon.c: fault/int handlers for VM monitor - monitor space.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"
/* The monitor stack frame. When an exception or interrupt occurrs
* during the execution of either guest or monitor code, the following
* values are pushed.
*
* ss
* esp
* eflags Values pushed by the CPU and interrupt stub. To simplify
* cs things, the stub pushes an error of zero for those
* eip events which don't naturally cause an error push, and
* error also pushes the vector of the exception/interrupt.
* vector
*
* eax
* ecx
* edx General registers, pushed with a PUSHA instruction,
* ebx by code below.
* <esp>
* ebp
* esi
* edi
*
* es
* ds Segment selectors, pushed by code below.
* fs
* gs
*/
void handleMonFault(guest_context_t *monContext);
static inline
Bit32u readCR2(void)
{
Bit32u cr2;
asm volatile ("movl %%cr2, %0" : "=r" (cr2));
return( cr2 );
}
asm (
".text \n\t"
/* __handle_fault: This is called by all of the monitor's fault handler
* stubs. A fault could have originated from execution of the guest
* (due to virtualization conditions or natural fault generation) or
* from the monitor (currently only due to bugs in the monitor).
*/
".globl __handle_fault \n\t"
"__handle_fault: \n\t"
" pushal \n\t" /* Save general registers */
" pushl %es \n\t" /* Save segment registers */
" pushl %ds \n\t"
" pushl %fs \n\t"
" pushl %gs \n\t"
" movl 60(%esp), %eax \n\t" /* CS pushed by CPU from fault */
" andl $3, %eax \n\t" /* Check CS.RPL bits */
" jz __fault_from_mon \n\t" /* RPL0 means from monitor */
/* We have determined that the fault was from guest code. Prepare
* to call the monitor C code to do most of the fault handling.
*/
"__fault_from_guest: \n\t"
" movl %ss, %eax \n\t" /* Copy SS into DS/ES */
" movl %eax, %ds \n\t"
" movl %eax, %es \n\t"
" cld \n\t" /* gcc-compiled code needs this */
" pushl %esp \n\t" /* Push pointer to saved guest context for C call.*/
" call handleGuestFault\n\t" /* Call the C monitor fault handler. */
" addl $4, %esp \n\t" /* Remove arg from stack. */
".globl __ret_to_guest \n\t" /* Fault handled, work back to guest. */
"__ret_to_guest: \n\t"
/* Return to the guest. Restore registers from the monitor stack. */
" popl %gs \n\t" /* Restore guest segments */
" popl %fs \n\t"
" popl %ds \n\t"
" popl %es \n\t"
" popal \n\t" /* Restore guest general registers */
" addl $8, %esp \n\t" /* Ignore vector and error dwords */
" iret \n\t" /* Resume execution of guest */
"__fault_from_mon: \n\t"
" cld \n\t" /* gcc-compiled code needs this */
" pushl %esp \n\t" /* Push pointer to context. */
" call handleMonFault \n\t" /* Call C code for real work */
" addl $4, %esp \n\t"
/* Return to monitor. Restore state from the monitor stack. */
"__ret_to_monitor: \n\t"
" popl %gs \n\t" /* Restore monitor segments */
" popl %fs \n\t"
" popl %ds \n\t"
" popl %es \n\t"
" popal \n\t" /* Restore monitor general registers */
" addl $8, %esp \n\t" /* ignore vector and error dwords */
" iret \n\t" /* Resume execution of monitor */
/*
* Hardware interrupt handler stub
*/
".globl __handle_int \n\t" /* Return to monitor code */
"__handle_int: \n\t"
" pushal \n\t" /* Save guest general registers */
" pushl %es \n\t" /* Save guest segment registers */
" pushl %ds \n\t"
" pushl %fs \n\t"
" pushl %gs \n\t"
" movl %ss, %eax \n\t" /* Copy SS into DS/ES */
" movl %eax, %ds \n\t"
" movl %eax, %es \n\t"
" cld \n\t" /* gcc-compiled code needs this */
" pushl %esp \n\t"
" call handleInt \n\t" /* monitor interrupt handler */
" addl $4, %esp \n\t"
" cmpl $0x1, %eax \n\t" /* Was interrupt generated from monitor code? */
" je __ret_to_monitor\n\t" /* Yes, so return to monitor code */
" jmp __ret_to_guest \n\t" /* No, so return to guest code */
);
unsigned
handleInt(guest_context_t *context)
/*
* handleInt(): Redirect a hardware interrupt back to the host
*/
{
nexus_t *nexus = (nexus_t *) (((Bit32u) context) & 0xfffff000);
vm_t *vm = (vm_t *) nexus->vm;
unsigned from_monitor;
Bit64u t1;
t1 = vm_rdtsc();
if ( (context->cs & 0x0003) == 0x0003 ) {
/* End of elapsed guest execution duration. Add elapsed */
/* cycles to time framework. */
vm->system.cyclesElapsed += (t1 - vm->system.t0);
from_monitor = 0; /* Event from guest code */
}
else {
from_monitor = 1; /* Event from monitor code */
}
/* Interrupts are off naturally here. */
vm->mon_request = MonReqRedirect;
vm->redirect_vector = context->vector;
vm->guest.__mon2host();
return(from_monitor);
}
void
handleGuestFault(guest_context_t *context)
/* Handle a fault from the guest. Called from the assembly stub
* __handle_fault.
*/
{
nexus_t *nexus = (nexus_t *) (((Bit32u) context) & 0xfffff000);
vm_t *vm = (vm_t *) nexus->vm;
Bit32u cr2 = readCR2();
Bit64u t1;
/* End of elapsed guest execution duration */
t1 = vm_rdtsc();
vm->system.cyclesElapsed += (t1 - vm->system.t0);
#warning "Delete these checks"
#if ANAL_CHECKS
if ( !context->eflags.fields.if_ )
monpanic(vm, "handleGuestFault: guest IF=0.\n");
if ( context->eflags.fields.vm )
monpanic(vm, "handleGuestFault: eflags.VM=1.\n");
#endif
STI();
switch ( context->vector ) {
case ExceptionDB: /* 1 */
monpanic(vm, "handleGuestFault: #DB, method=%u not coded\n",
vm->executeMethod);
#if 0
if (vm->executeMethod == RunGuestNMethodBreakpoint) {
/* Breakpoint generated because we requested it via TF=1 */
}
else {
monpanic(vm, "handleGuestFault: #DB, method=%u not coded\n",
vm->executeMethod);
}
#endif
break;
case ExceptionBR: /* 5 */
monpanic(vm, "handleGuestFault: BR unfinished.\n");
/* BOUND instruction fault; array index not in bounds */
monpanic(vm, "handleGuestFault: emulate_exception was here.\n");
/*emulate_exception(vm, context->vector, 0);*/
break;
case ExceptionDE: /* 0 */
case ExceptionBP: /* 3 */
case ExceptionOF: /* 4 */
case ExceptionNM: /* 7 */
case ExceptionMF: /* 16 */
toHostGuestFault(vm, context->vector);
/*monpanic(vm, "handleGuestFault: DE/BP/OF/NM/MF unfinished.\n");*/
/*monpanic(vm, "handleGuestFault: %u\n", context->vector);*/
/* emulate_interrupt(vm, context->vector); */
break;
case ExceptionNP: /* 11 */
case ExceptionSS: /* 12 */
case ExceptionAC: /* 17 */
monpanic(vm, "handleGuestFault: NP/SS/AC unfinished.\n");
/* use emulate_xyz() */
/*interrupt(vm, context->vector, 0, 1, context->error); */
monpanic(vm, "handleGuestFault: %u\n", context->vector);
break;
case ExceptionUD: /* 6 */
case ExceptionGP: /* 13 */
toHostGuestFault(vm, context->vector);
break;
case ExceptionPF: /* 14 */
guestPageFault(vm, context, cr2);
break;
default:
monpanic(vm, "handleGuestFault: Unhandled Fault: %u\n", context->vector);
break;
}
}
void
handleMonFault(guest_context_t *monContext)
{
nexus_t *nexus = (nexus_t *) (((Bit32u) monContext) & 0xfffff000);
vm_t *vm = (vm_t *) nexus->vm;
if (vm->inMonFault)
monpanic(vm, "handleMonFault called recursively.\n");
vm->inMonFault = 1;
monpanic(vm, "handleMonFault: vector=%u\n", monContext->vector);
/* Fault occurred inside monitor code. */
switch ( monContext->vector ) {
case ExceptionPF:
case ExceptionGP:
{
Bit32u cr2;
/*unsigned us, rw;*/
cr2 = readCR2();
STI();
if (monContext->error & 0x8) /* If RSVD bits used in PDir */
monpanic(vm, "handleMF: RSVD\n");
/*us = G_GetCPL(vm)==3;*/
/*rw = (monContext->error >> 1) & 1;*/
monpanic(vm, "handleMF: \n");
break;
}
default:
monpanic(vm, "hMF: vector=%u\n", monContext->vector);
break;
}
/*vm->abort_code = 1;*/
/*monpanic_nomess(vm);*/
CLI();
vm->inMonFault = 0;
}

View File

@ -0,0 +1,6 @@
# FreeBSD Kernel module makefile
SRCS= ../plex86.o
KMOD= plex86
.include <bsd.kmod.mk>

View File

@ -0,0 +1,219 @@
#include <Drivers.h>
#include <KernelExport.h>
#include <OS.h>
#include <SupportDefs.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "plex86.h"
#include "monitor.h"
#define read_flags() ({ \
unsigned int __dummy; \
__asm__( \
"pushfl\n\t" \
"popl %0\n\t" \
:"=r" (__dummy)); \
__dummy; \
})
#define write_flags(x) \
__asm__("push %0\n\tpopfl\n\t": :"r" (x))
struct cookie {
sem_id sem;
int mon_ok;
uint32 irq_cnt[17];
};
static status_t
driver_open(const char *name, ulong flags, void **_cookie)
{
struct cookie *cookie;
status_t err;
cookie = calloc(sizeof(*cookie), 1);
if (!cookie)
return ENOMEM;
cookie->sem = create_sem(1, "plex86 mutex");
if (cookie->sem < 0) {
err = cookie->sem;
goto err1;
}
*_cookie = cookie;
return B_OK;
err1:
free(cookie);
return err;
}
static status_t
driver_close(void *cookie)
{
return B_OK;
}
static status_t
driver_free(void *_cookie)
{
struct cookie *cookie = (struct cookie *)_cookie;
delete_sem(cookie->sem);
free(cookie);
return B_OK;
}
static status_t
driver_read(void *cookie, off_t pos, void *buf, size_t *count)
{
return B_OK;
}
static status_t
driver_write(void *cookie, off_t pos, const void *buf, size_t *count)
{
return B_OK;
}
static status_t
driver_ioctl(void *_cookie, ulong cmd, void *buf, size_t len)
{
struct cookie *cookie = (struct cookie *)_cookie;
uint32 cr0, arg, eflags_orig;
uchar soft_int_vector;
status_t err;
arg = *(uint32 *)buf;
switch (cmd) {
/* Allocate unpaged memory for the VM. */
/* arg is the number of megabytes to allocate */
/* Memory returned must not be pageable by the */
/* host OS, since the VM monitor will run in this */
/* memory as well. Perhaps later, we can let */
/* the guest OS run in paged memory and reflect */
/* the page faults back to the host OS. */
case 0x6b02:
acquire_sem(cookie->sem);
init_monitor(MASTER_PIC_BASE_VECTOR, SLAVE_PIC_BASE_VECTOR, IRQ16_BASE_VECTOR);
cookie->mon_ok = 1;
release_sem(cookie->sem);
return B_OK;
case 0x6b03:
/* linux-specific hack, unnecessary under BeOS */
return B_OK;
/* run guest context for a time slice */
case 0x6b04:
{
cpu_status ps;
acquire_sem(cookie->sem);
if (!cookie->mon_ok) {
release_sem(cookie->sem);
return EPERM;
}
ps = disable_interrupts();
/* clear NT/IF/TF */
eflags_orig = read_flags();
write_flags(eflags_orig & ~0x00004300);
__host2guest();
write_flags(eflags_orig & ~0x00000200);
restore_interrupts(ps);
switch ( monitor_info.ret_because ) {
case RET_BECAUSE_IRQ:
/* reported vector is actually the IRQ# */
soft_int_vector = MASTER_PIC_BASE_VECTOR + monitor_info.vector;
soft_int(soft_int_vector);
cookie->irq_cnt[monitor_info.vector]++;
dprintf("plex86: irq %u\n", monitor_info.vector);
err = B_OK;
break;
case RET_BECAUSE_INT:
dprintf("plex86: int %u\n", monitor_info.vector);
err = EFAULT;
break;
case RET_BECAUSE_EXC:
dprintf("plex86: exc %u\n", monitor_info.vector);
err = EFAULT;
break;
case RET_BECAUSE_TEST:
dprintf("plex86: test\n");
err = B_OK;
break;
default:
dprintf("plex86: unknown ret_because\n");
err = B_OK;
break;
}
release_sem(cookie->sem);
return err;
}
case 0x6b05: /* tear down VM environment */
acquire_sem(cookie->sem);
cookie->mon_ok = 0;
release_sem(cookie->sem);
return B_OK;
}
return ENOSYS;
}
device_hooks driver_device = {
driver_open,
driver_close,
driver_free,
driver_ioctl,
driver_read,
driver_write
};
status_t
init_driver (void)
{
return B_OK;
}
void
uninit_driver(void)
{
}
const char **
publish_devices(void)
{
static const char *driver_names[] = {
"misc/plex86",
NULL
};
return (const char **)driver_names;
}
device_hooks *
find_device(const char *name)
{
return &driver_device;
}

View File

@ -0,0 +1,569 @@
/*
* plex86: run multiple x86 operating systems concurrently
*
* Copyright (C) 2000 Frank van der Linden (fvdl@wasabisystems.com)
* Copyright (C) 2000 Alexander Langer <alex@big.endian.de>
*
* License as published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, write to the Free Software Foundation,
* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#define DIAGNOSTIC 1
#define CDEV_MAJOR 20
#define timer_t __bsd_timer_t
#define write_eflags __freebsd_write_eflags
#define read_eflags __freebsd_read_eflags
/* XXX recheck, which includes are needed */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/exec.h>
#include <sys/malloc.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/signalvar.h>
#include <sys/mman.h>
#include <sys/kernel.h>
#include <sys/linker.h>
#include <sys/sysproto.h>
#include <sys/module.h>
#include <vm/vm.h>
#include <machine/cpu.h>
#undef timer_t
#undef write_eflags
#undef read_eflags
#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"
static MALLOC_DEFINE(M_PLEX86, "plex86", "Plex86 mem");
static d_open_t plex86_open;
static d_close_t plex86_close;
static d_mmap_t plex86_mmap;
static d_ioctl_t plex86_ioctl;
static unsigned retrieve_phy_pages(Bit32u *, int, void *, unsigned, int);
static vm_t *find_vm(struct proc * p);
static void register_vm(vm_t * vm, struct proc * p);
static void unregister_all(struct proc * p);
static struct cdevsw plex86_cdevsw = {
/* open */ plex86_open,
/* close */ plex86_close,
/* read */ noread,
/* write */ nowrite,
/* ioctl */ plex86_ioctl,
/* poll */ nopoll,
/* mmap */ plex86_mmap,
/* strat */ nostrategy,
/* name */ "plex86",
/* major */ CDEV_MAJOR,
/* dump */ nodump,
/* psize */ nopsize,
/* flags */ 0,
/* bmaj */ -1
};
/* For use with make_dev/destroy_dev */
static dev_t plex86_dev;
static struct plex86_softc {
int sc_open;
} plex86sc;
monitor_pages_t monitor_pages;
/*
* Hash table stuff to maintain proc <-> vm mapping. 23 entries should be
* plenty.. unless someone plans to run more than 23 guest OSs..
*
* Note that a process can only open the device once with this scheme.
*/
LIST_HEAD(plex86_hashhead, plex86_vmentry);
struct plex86_vmentry {
pid_t vm_pid;
vm_t *vm_vm;
LIST_ENTRY(plex86_vmentry) vm_entry;
};
struct plex86_hashhead *plex86_hashtbl;
u_long plex86_hashmask;
#define PLEX86_VMHASHSIZE 23
#define PLEX86_VMHASH(p) ((u_long)((p)->p_pid) & plex86_hashmask)
static int
plex86_open(dev_t dev, int flags, int fmt, struct proc * p)
{
vm_t *vm;
if (suser_xxx(p->p_ucred, p, p->p_acflag) != 0)
return (EPERM);
vm = find_vm(p);
if (vm == NULL) {
vm = malloc(sizeof(vm_t), M_PLEX86, M_WAITOK);
if (vm == NULL)
return EIO;
memset(vm, 0, sizeof(vm_t));
register_vm(vm, p);
plex86sc.sc_open++;
} else
return (EBUSY);
/* Kernel independent device open code. */
hostDeviceOpenInit(vm);
#ifdef FREEBSD_PLEX86_DEBUG
printf("plex86: pid %u opened device, vm %p\n", p->p_pid, vm);
#endif
return (0);
}
int
plex86_close(dev_t dev, int flags, int fmt, struct proc * p)
{
unregister_all(p);
plex86sc.sc_open = 0;
#ifdef FREEBSD_PLEX86_DEBUG
printf("plex86: pid %u closed device\n", p->p_pid);
#endif
return (0);
}
int
plex86_mmap(dev_t dev, vm_offset_t offset, int nprot)
{
struct proc *p = curproc;
int page;
vm_offset_t endguestoff;
vm_t *vm;
vm = find_vm(p);
if (vm == NULL)
return (ENXIO);
#warning "kludge to mmap message buffer"
endguestoff = (vm_offset_t) (vm->pages.guest_n_megs * 1024 * 1024);
if (offset >= endguestoff && nprot == PROT_READ) {
page = (offset - endguestoff) / PAGE_SIZE;
return (vm->pages.log_buffer[page]);
}
page = offset / PAGE_SIZE;
if (page < 0 || page > vm->pages.guest_n_pages) {
log(LOG_WARNING, "plex86: mmap: offset %lx out of range\n",
(unsigned long) offset);
return -1;
}
return vm->pages.guest[page];
}
int
plex86_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags,
struct proc * p)
{
int error;
vm_t *vm;
vm = find_vm(p);
if (vm == NULL)
return EINVAL;
switch (cmd) {
case PLEX86_ALLOCVPHYS:
{
unsigned arg = *((unsigned *) data);
guest_cpu_t guest_cpu;
if (vm->mon_state != MON_STATE_UNINITIALIZED ||
vm->pages.guest_n_megs != 0)
return EBUSY;
if (arg > PLEX86_MAX_PHY_MEGS || arg < 4 || (arg & ~0x3) != arg)
return EINVAL;
/* Allocate memory */
error = allocVmPages(vm, arg);
if (error != 0) {
log(LOG_WARNING, "plex86: allocVmPages failed (%d)\n",
error);
return ENOMEM;
}
if (init_guest_phy_mem(vm) != 0) {
log(LOG_ERR, "plex86: init_guest_phy_mem failed\n");
unallocVmPages(vm);
return EFAULT;
}
getCpuResetValues(&guest_cpu);
log(LOG_WARNING, "plex86: cpu.cr0 = 0x%x\n", guest_cpu.cr0);
if (!init_monitor(vm, 0, 0, &guest_cpu) ||
!setGuestCPU(vm, 0, &guest_cpu) ||
!mapMonitor(vm, guest_cpu.eflags, 0)) {
log(LOG_ERR, "plex86: init_monitor failed\n");
unallocVmPages(vm);
return EFAULT;
}
break;
}
case PLEX86_TEARDOWN:
unallocVmPages(vm);
break;
case PLEX86_ALLOCINT:
return EINVAL;
case PLEX86_RELEASEINT:
return EINVAL;
case PLEX86_PRESCANDEPTH:
{
unsigned long arg = *(unsigned long *) data;
if ((arg < PrescanDepthMin) || (arg > PrescanDepthMax)) {
log(LOG_WARNING, "plex86: Requested prescan depth %lu"
" out of range [%u..%u]\n", arg, PrescanDepthMin,
PrescanDepthMax);
return EINVAL;
}
vm->prescanDepth = (unsigned) arg;
break;
}
case PLEX86_SETINTR:
ioctlSetIntr(vm, *(unsigned long *) data);
break;
case PLEX86_SET_A20:
{
unsigned long arg = *(unsigned long *) data;
if (!ioctlSetA20E(vm, arg))
return EINVAL;
break;
}
case PLEX86_MESSAGEQ:
{
vm_messages_t msg;
if (vm->mon_state != MON_STATE_RUNNABLE)
return EINVAL;
error = copyin(*(void **) data, &msg.header, sizeof msg.header);
if (error != 0)
return error;
if ((msg.header.msg_len + sizeof(msg.header)) > sizeof(msg))
return EINVAL;
if (msg.header.msg_len != 0) {
error = copyin(&((vm_messages_t *) * (void **) data)->msg,
&msg.msg, msg.header.msg_len);
if (error != 0)
return error;
}
if (ioctlMessageQ(vm, &msg)) {
log(LOG_WARNING, "plex86: ioctlMessageQ failed\n");
return EINVAL;
}
error = copyout(&msg, *(void **) data,
sizeof(msg.header) + msg.header.msg_len);
return error;
}
case PLEX86_RESET:
break;
case PLEX86_PHYMEM_MOD:
break;
case PLEX86_FORCE_INT:
if (vm->mon_state != MON_STATE_RUNNABLE)
return -EINVAL;
vm->dbg_force_int = 0x100 | (unsigned) data;
break;
case PLEX86_PRESCANRING3:
{
unsigned long arg = *(unsigned long *) data;
if (arg > PrescanRing3On) {
log(LOG_WARNING,
"plex86: Requested PrescanRing3 val(%lu) OOB\n",
arg);
return EINVAL;
}
vm->prescanRing3 = arg;
break;
}
default:
log(LOG_WARNING, "plex86: unknown ioctl %lx\n", cmd);
return EINVAL;
}
return 0;
}
static void
register_vm(vm_t * vm, struct proc * p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
#if DIAGNOSTIC
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p->p_pid)
panic("plex86: vm already registered, pid %u\n",
p->p_pid);
}
#endif
vhp = malloc(sizeof(struct plex86_vmentry), M_PLEX86, M_WAITOK);
vhp->vm_pid = p->p_pid;
vhp->vm_vm = vm;
LIST_INSERT_HEAD(php, vhp, vm_entry);
}
static void
unregister_vm(vm_t * vm, struct proc * p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p->p_pid) {
LIST_REMOVE(vhp, vm_entry);
free(vhp->vm_vm, M_PLEX86);
free(vhp, M_PLEX86);
break;
}
}
}
static void
unregister_all(struct proc * p)
{
int i;
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
if (php == NULL)
return;
for (vhp = php->lh_first; vhp != NULL;
vhp = vhp->vm_entry.le_next) {
#ifdef FREEBSD_PLEX86_DEBUG
printf("plex86: unregister vm %p, pid %u\n",
vhp->vm_vm, vhp->vm_pid);
#endif
LIST_REMOVE(vhp, vm_entry);
if (vhp->vm_vm != NULL)
free(vhp->vm_vm, M_PLEX86);
if (vhp != NULL)
free(vhp, M_PLEX86);
}
}
static vm_t *
find_vm(struct proc * p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p->p_pid)
return vhp->vm_vm;
}
return NULL;
}
static unsigned
retrieve_phy_pages(Bit32u * page, int max_pages, void *addr_v, unsigned size,
int aligned)
{
Bit32u start_addr;
unsigned n_pages, i;
if (!aligned)
start_addr = (Bit32u) addr_v & ~(PAGE_SIZE - 1);
else {
start_addr = (Bit32u) addr_v;
if (start_addr & (PAGE_SIZE - 1)) {
log(LOG_WARNING, "plex86: retrieve_phy_pages: address "
"%p not aligned\n", addr_v);
return 0;
}
}
n_pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
if (n_pages > max_pages) {
log(LOG_WARNING, "plex86: retrieve_phy_pages: page list "
"too small\n");
return (0);
}
for (i = 0; i < n_pages; i++) {
page[i] = kvtop((vm_offset_t) start_addr) / PAGE_SIZE;
start_addr += PAGE_SIZE;
}
return n_pages;
}
unsigned
host_idle(void)
{
#if defined(want_resched)
if (want_resched) {
#endif
yield(curproc, NULL); /* XXX */
need_resched(); /* XXX */
#if defined(want_resched)
}
#endif
printf("resched done\n");
return (CURSIG(curproc) == 0);
}
void *
host_alloc(unsigned long size)
{
/*
* XXX - it wants this page-aligned apparently.
*/
if (size <= (PAGE_SIZE / 2))
size = PAGE_SIZE;
return (malloc(size, M_PLEX86, M_WAITOK));
}
void
host_free(void *ptr)
{
free(ptr, M_PLEX86);
}
unsigned
host_map(Bit32u * page, int max_pages, void *ptr, unsigned size)
{
return retrieve_phy_pages(page, max_pages, ptr, size, 1);
}
void *
host_alloc_page(void)
{
return malloc(PAGE_SIZE, M_PLEX86, M_WAITOK);
}
void
host_free_page(void *ptr)
{
return free(ptr, M_PLEX86);
}
Bit32u
host_map_page(void *ptr)
{
Bit32u u;
if (ptr == NULL)
return 0;
u = kvtop(ptr) / PAGE_SIZE;
#if FREEBSD_PLEX86_DEBUG
printf("host_map_page(%p) -> %x\n", ptr, u);
#endif
return u;
}
void
hostprint(char *fmt,...)
{
va_list args;
int ret;
unsigned char buffer[256];
va_start(args, fmt);
ret = vsnprintf(buffer, 256, fmt, args);
if (ret == -1)
log(LOG_WARNING,
"plex86: hostprint: vsnprintf returns error.\n");
else
log(LOG_WARNING, "plex86: %s\n", buffer);
}
static int
plex86_modevent(module_t mod, int type, void *data)
{
linker_file_t lf;
int error = 0;
switch (type) {
case MOD_LOAD:
plex86_hashtbl = NULL;
lf = linker_find_file_by_name("plex86");
if (lf == NULL) {
printf("plex86: can't find linker_file 'plex86'\n");
return (ENXIO);
}
monitor_pages.startOffset = lf->address;
monitor_pages.startOffsetPageAligned =
monitor_pages.startOffset & 0xfffff000;
if ((monitor_pages.n_pages = retrieve_phy_pages(monitor_pages.page,
PLEX86_MAX_MONITOR_PAGES,
lf->address,
lf->size,
0)) == 0) {
log(LOG_WARNING, "plex86: could not store physical "
"addresses for monitor pages\n");
return (ENXIO);
}
plex86_hashtbl = hashinit(PLEX86_VMHASHSIZE,
M_PLEX86, &plex86_hashmask);
if (!hostModuleInit()) {
log(LOG_WARNING, "hostModuleInit error\n");
error = EINVAL;
}
plex86_dev = make_dev(&plex86_cdevsw, 0 /* minor */ , UID_ROOT,
GID_WHEEL, 0600, "plex86");
printf("plex86: Module loaded.\n");
return (0);
break;
case MOD_UNLOAD:
if (plex86sc.sc_open != 0)
return (EBUSY);
destroy_dev(plex86_dev);
if (plex86_hashtbl != NULL) {
free(plex86_hashtbl, M_PLEX86);
}
printf("plex86: Module unloaded.\n");
break;
default:
error = ENXIO;
break;
}
return (error);
}
MODULE_VERSION(plex86, 1);
DEV_MODULE(plex86, plex86_modevent, 0);

View File

@ -0,0 +1,800 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* host-linux.c: Linux specific VM host driver functionality
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/wrapper.h>
#include <linux/version.h>
#include <asm/irq.h>
#include <asm/atomic.h>
#ifndef VERSION_CODE
# define VERSION_CODE(vers,rel,seq) ( ((vers)<<16) | ((rel)<<8) | (seq) )
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,20)
/* I use get_user_pages() to find and pin physical pages of memory
* underlying the guest physical memory malloc()'d from user space.
* This became an exported symbol available for kernel modules
* as of 2.4.20. You will have to recode some functions for
* lesser kernels.
*/
# error "Currently, you need Linux kernel 2.4.20 or above."
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
# include <asm/uaccess.h>
#endif
#include <asm/io.h>
/************************************************************************/
/* Compatibility macros & convenience functions for older kernels */
/************************************************************************/
#ifndef EXPORT_NO_SYMBOLS
# define EXPORT_NO_SYMBOLS register_symtab(NULL)
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,29)
# define proc_register_dynamic proc_register
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,2,0)
#define NEED_RESCHED need_resched
#else
#define NEED_RESCHED current->need_resched
#endif
/* Instrumentation of how many hardware interrupts were redirected
* to the host, while the VM monitor/guest was running. This can be
* written to by multiple contexts, so it needs SMP protection.
*/
static atomic_t interruptRedirCount[256];
#if LINUX_VERSION_CODE < VERSION_CODE(2,1,0)
static inline unsigned long
copy_from_user(void *to, const void *from, unsigned long n)
{
int i;
if ( (i = verify_area(VERIFY_READ, from, n)) != 0 )
return i;
memcpy_fromfs(to, from, n);
return 0;
}
static inline unsigned long
copy_to_user(void *to, const void *from, unsigned long n)
{
int i;
if ( (i = verify_area(VERIFY_WRITE, to, n)) != 0 )
return i;
memcpy_tofs(to, from, n);
return 0;
}
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,18) && !defined(THIS_MODULE)
/* Starting with version 2.1.18, the __this_module symbol is present,
* but the THIS_MODULE #define was introduced much later ...
*/
#define THIS_MODULE (&__this_module)
#endif
/************************************************************************/
/* Declarations */
/************************************************************************/
/* Use dynamic major number allocation. (Set non-zero for static allocation) */
#define PLEX86_MAJOR 0
static int plex_major = PLEX86_MAJOR;
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,18)
MODULE_PARM(plex_major, "i");
MODULE_PARM_DESC(plex_major, "major number (default " __MODULE_STRING(PLEX86_MAJOR) ")");
#endif
/* The kernel segment base. */
#if LINUX_VERSION_CODE < VERSION_CODE(2,1,0)
# define KERNEL_OFFSET 0xc0000000
#else
# define KERNEL_OFFSET 0x00000000
#endif
/* File operations. */
static int plex86_ioctl(struct inode *, struct file *, unsigned int,
unsigned long);
static int plex86_open(struct inode *, struct file *);
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
static int plex86_release(struct inode *, struct file *);
#else
static void plex86_release(struct inode *, struct file *);
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
static int plex86_mmap(struct file * file, struct vm_area_struct * vma);
#else
static int plex86_mmap(struct inode * inode, struct file * file,
struct vm_area_struct * vma);
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9)
/* New License scheme. */
#ifdef MODULE_LICENSE
MODULE_LICENSE("GPL"); /* Close enough. Keeps kernel from complaining. */
#endif
#endif
/************************************************************************/
/* Structures / Variables */
/************************************************************************/
static int retrieveKernelModulePages(void);
static unsigned retrievePhyPages(Bit32u *page, int max_pages, void *addr,
unsigned size);
static struct file_operations plex86_fops = {
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
owner: THIS_MODULE,
#endif
mmap: plex86_mmap,
ioctl: plex86_ioctl,
open: plex86_open,
release: plex86_release,
};
#ifdef CONFIG_DEVFS_FS
#include <linux/devfs_fs_kernel.h>
devfs_handle_t my_devfs_entry;
#endif
/* For the /proc/driver/plex86 entry. */
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0) /* XXX - How far back? */
int plex86_read_procmem(char *, char **, off_t, int);
#else
int plex86_read_procmem(char *, char **, off_t, int, int);
#endif
#if LINUX_VERSION_CODE < VERSION_CODE(2,3,25)
static struct proc_dir_entry plex86_proc_entry = {
0, /* dynamic inode */
6, "driver/plex86", /* len, name */
S_IFREG | S_IRUGO, /* mode */
1, 0, 0,
0,
NULL,
&plex86_read_procmem, /* read function */
};
#endif
#if CONFIG_X86_PAE
# error "CONFIG_X86_PAE defined for this kernel, but unhandled in plex86"
#endif
/************************************************************************/
/* Main kernel module code */
/************************************************************************/
int
init_module(void)
{
int err;
/* Initialize structures which are not specific to each VM. These
* are things which are set only once upon kernel module initialization.
*/
memset(&kernelModulePages, 0, sizeof(kernelModulePages));
memset(&interruptRedirCount, 0, sizeof(interruptRedirCount));
/* Register the device with the kernel. */
err = register_chrdev(plex_major, "plex86", &plex86_fops);
if (err < 0) {
printk(KERN_WARNING "plex86: can't get major %d\n", plex_major);
return(err);
}
/* If this was a dynamic allocation, save the major for
* the release code
*/
if(!plex_major)
plex_major = err;
/* Register the /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
if (!create_proc_info_entry("driver/plex86", 0, NULL, plex86_read_procmem))
printk(KERN_ERR "plex86: registering /proc/driver/plex86 failed\n");
#else
proc_register_dynamic(&proc_root, &plex86_proc_entry);
#endif
#endif
/* Register /dev/misc/plex86 with devfs. */
#ifdef CONFIG_DEVFS_FS
my_devfs_entry = devfs_register(NULL, "misc/plex86",
DEVFS_FL_DEFAULT,
plex_major, 0 /* minor mode*/,
S_IFCHR | 0666, &plex86_fops,
NULL /* "info" */);
if (!my_devfs_entry)
printk(KERN_ERR "plex86: registering misc/plex86 devfs entry failed\n");
#endif
/* Retrieve the monitor physical pages. */
if ( !retrieveKernelModulePages() ) {
printk(KERN_ERR "plex86: retrieveKernelModulePages returned error\n");
err = -EINVAL;
goto fail_retrieve_pages;
}
/* Kernel independent code to be run when kernel module is loaded. */
if ( !hostModuleInit() ) {
printk(KERN_ERR "plex86: genericModuleInit returned error\n");
err = -EINVAL;
goto fail_cpu_capabilities;
}
/* Success. */
EXPORT_NO_SYMBOLS;
return(0);
fail_cpu_capabilities:
fail_retrieve_pages:
/* Unregister /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
remove_proc_entry("driver/plex86", NULL);
#else
proc_unregister(&proc_root, plex86_proc_entry.low_ino);
#endif
#endif
/* Unregister device. */
unregister_chrdev(plex_major, "plex86");
return err;
}
void
cleanup_module(void)
{
/* Unregister device. */
unregister_chrdev(plex_major, "plex86");
/* Unregister /proc entry. */
#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,25)
remove_proc_entry("driver/plex86", NULL);
#else
proc_unregister(&proc_root, plex86_proc_entry.low_ino);
#endif
#endif
#ifdef CONFIG_DEVFS_FS
devfs_unregister(my_devfs_entry);
#endif
}
/************************************************************************/
/* Open / Release a VM */
/************************************************************************/
int
plex86_open(struct inode *inode, struct file *filp)
{
vm_t *vm;
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
MOD_INC_USE_COUNT;
#endif
/* Allocate a VM structure. */
if ( (vm = hostOSAllocZeroedMem(sizeof(vm_t))) == NULL )
return -ENOMEM;
filp->private_data = vm;
/* Kernel independent device open code. */
hostDeviceOpen(vm);
return(0);
}
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
int
#else
void
#endif
plex86_release(struct inode *inode, struct file *filp)
{
vm_t *vm = (vm_t *)filp->private_data;
filp->private_data = NULL;
/* Free the virtual memory. */
hostUnallocVmPages( vm );
/* Free the VM structure. */
memset( vm, 0, sizeof(*vm) );
vfree( vm );
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
MOD_DEC_USE_COUNT;
#endif
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,31)
return(0);
#endif
}
int
plex86_ioctl(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
vm_t *vm = (vm_t *)filp->private_data;
int ret;
/* Call non host-specific ioctl() code which calls back to this
* module only when it needs host-specific features.
*/
ret = hostIoctlGeneric(vm, inode, filp, cmd, arg);
/* Convert from plex86 errno codes to host-specific errno codes. Not
* very exciting.
*/
if ( ret < 0 )
ret = - hostOSConvertPlex86Errno(- ret);
return( ret );
}
int
#if LINUX_VERSION_CODE >= VERSION_CODE(2,1,0)
plex86_mmap(struct file * file, struct vm_area_struct * vma)
#else
plex86_mmap(struct inode * inode, struct file * file, struct vm_area_struct * vma)
#endif
{
vm_t *vm = (vm_t *)file->private_data;
UNUSED(vm);
return -EINVAL;
}
/************************************************************************/
/* Status reporting: /proc code */
/************************************************************************/
int
plex86_read_procmem(char *buf, char **start, off_t offset,
#if LINUX_VERSION_CODE >= VERSION_CODE(2,4,0)
int len
#else
int len, int unused
#endif
)
{
unsigned i;
len = 0;
len += sprintf(buf, "monitor-->host interrupt reflection counts\n");
for (i=0; i<256; i++) {
int count;
count = atomic_read( &interruptRedirCount[i] );
if (count)
len += sprintf(buf+len, " 0x%2x:%10u\n", i, count);
}
return(len);
}
int
retrieveKernelModulePages(void)
{
/*
* Retrieve start address and size of this module.
*
* Note that with old kernels, we cannot access the module info (size),
* hence we rely on the fact that Linux lets at least one page of
* virtual address space unused after the end of the module.
*/
#ifdef THIS_MODULE
Bit32u driverStartAddr = (Bit32u) THIS_MODULE;
unsigned size = THIS_MODULE->size;
#else
Bit32u driverStartAddr = (Bit32u) &mod_use_count_;
unsigned size = 0; /* Actual size determined below */
#endif
Bit32u driverStartAddrPageAligned = driverStartAddr & ~0xfff;
int nPages;
if (driverStartAddr != driverStartAddrPageAligned) {
/* Pretend this kernel module starts at the beginning of the page. */
/* If size is known, we have to add the extra offset from the beginning
* of the page.
*/
if (size)
size += (driverStartAddr & 0xfff);
}
nPages = retrievePhyPages(kernelModulePages.ppi, Plex86MaxKernelModulePages,
(void *) driverStartAddrPageAligned, size);
if (nPages == 0) {
printk(KERN_ERR "plex86: retrieveKernelModulePages: retrieve returned error.\n");
return( 0 ); /* Error. */
}
printk(KERN_WARNING "plex86: %u monitor pages located\n", nPages);
kernelModulePages.startOffset = driverStartAddr;
kernelModulePages.startOffsetPageAligned = driverStartAddrPageAligned;
kernelModulePages.nPages = nPages;
return( 1 ); /* OK. */
}
unsigned
retrievePhyPages(Bit32u *page, int max_pages, void *addr_v, unsigned size)
{
/*
* Grrr. There doesn't seem to be an exported mechanism to retrieve
* the physical pages underlying a vmalloc()'ed area. We do it the
* hard way ...
*/
pageEntry_t *host_pgd;
Bit32u host_cr3;
Bit32u addr; // start_addr;
unsigned n_pages;
int i;
addr = (Bit32u) addr_v;
if ( addr & 0xfff ) {
printk(KERN_ERR "plex86: retrievePhyPages: not page aligned!\n");
return 0;
}
if (!addr) {
printk(KERN_ERR "plex86: retrievePhyPages: addr NULL!\n");
return 0;
}
if (size == 0) {
/* Size unknown. Determine by cycling through page tables until
* we find one which is not present. We will assume that means
* the end of the data structure. Set the number of pages to
* cycle through, to one more than the maximum requested. This
* way we'll look through enough pages.
*/
n_pages = max_pages + 1;
}
else {
n_pages = BytesToPages(size);
if ( n_pages > max_pages ) {
printk(KERN_ERR "plex86: retrievePhyPages: n=%u > max=%u\n",
n_pages, max_pages);
return 0;
}
}
asm volatile ("movl %%cr3, %0" : "=r" (host_cr3));
host_pgd = (pageEntry_t *)(phys_to_virt(host_cr3 & ~0xfff));
for (i = 0; i < n_pages; i++) {
Bit32u laddr;
unsigned long lpage;
pgd_t *pgdPtr; pmd_t *pmdPtr; pte_t *ptePtr;
pgd_t pgdVal; pmd_t pmdVal; pte_t pteVal;
laddr = KERNEL_OFFSET + ((Bit32u) addr);
lpage = VMALLOC_VMADDR(laddr);
/* About to traverse the page tables. We need to lock others
* out of them briefly. Newer Linux versions can do a fine-grained
* lock on the page tables themselves. Older ones have to do
* a "big kernel lock".
*/
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,10)
spin_lock(&init_mm.page_table_lock);
#else
lock_kernel(); /* Big kernel lock. */
#endif
pgdPtr = pgd_offset(&init_mm, lpage);
pmdPtr = pmd_offset(pgdPtr, lpage);
ptePtr = pte_offset(pmdPtr, lpage);
pgdVal = *pgdPtr;
pmdVal = *pmdPtr;
pteVal = *ptePtr;
#if LINUX_VERSION_CODE >= VERSION_CODE(2,3,10)
spin_unlock(&init_mm.page_table_lock);
#else
unlock_kernel(); /* Big kernel unlock. */
#endif
if ( !(pgdVal.pgd & 1) ||
!(pmdVal.pmd & 1) ||
!(pteVal.pte_low & 1) ) {
if (size == 0)
return i; /* Report number of pages until area ended. */
printk(KERN_ERR "plex86: retrievePhyPages: "
"PDE.P==0: i=%u, n=%u laddr=0x%x\n", i, n_pages, laddr);
return 0; /* Error, ran into unmapped page in memory range. */
}
/* Abort if our page list is too small. */
if (i >= max_pages) {
printk(KERN_WARNING "plex86: page list is too small!\n");
printk(KERN_WARNING "plex86: n_pages=%u, max_pages=%u\n",
n_pages, max_pages);
return 0;
}
/* Get physical page address for this virtual page address. */
page[i] = pte_val(pteVal) >> 12;
/* Increment to the next virtual page address. */
addr += 4096;
}
return(n_pages);
}
/************************************************************************
* The requisite host-specific functions. An implementation of each of
* these functions needs to be offered for each host-XYZ.c file.
************************************************************************/
unsigned
hostOSIdle(void)
{
if (NEED_RESCHED)
schedule();
/* return !current_got_fatal_signal(); */
return( ! signal_pending(current) );
}
void *
hostOSAllocZeroedMem(unsigned long size)
{
void *ptr;
ptr = vmalloc(size);
if ( ((Bit32u) ptr) & 0x00000fff )
return( 0 ); /* Error. */
/* Zero pages. This also demand maps the pages in, which we need
* since we'll cycle through all the pages to get the physical
* address mappings.
*/
mon_memzero(ptr, size);
return( ptr );
}
void
hostOSFreeMem(void *ptr)
{
vfree(ptr);
}
void *
hostOSAllocZeroedPage(void)
{
return( (void *) get_zeroed_page(GFP_KERNEL) );
}
void
hostOSFreePage(void *ptr)
{
free_page( (Bit32u)ptr );
}
unsigned
hostOSGetAllocedMemPhyPages(Bit32u *page, int max_pages, void *ptr, unsigned size)
{
return( retrievePhyPages(page, max_pages, ptr, size) );
}
Bit32u
hostOSGetAllocedPagePhyPage(void *ptr)
{
if (!ptr) return 0;
/* return MAP_NR(ptr); */
return(__pa(ptr) >> PAGE_SHIFT);
}
void
hostOSPrint(char *fmt, ...)
{
#warning "Fix hostPrint"
#if 0
va_list args;
int ret;
unsigned char buffer[256];
va_start(args, fmt);
ret = mon_vsnprintf(buffer, 256, fmt, args);
if (ret == -1) {
printk(KERN_ERR "plex86: hostPrint: vsnprintf returns error.\n");
}
else {
printk(KERN_WARNING "plex86: %s\n", buffer);
}
#endif
}
int
hostOSConvertPlex86Errno(unsigned ret)
{
switch (ret) {
case 0: return(0);
case Plex86ErrnoEBUSY: return(EBUSY);
case Plex86ErrnoENOMEM: return(ENOMEM);
case Plex86ErrnoEFAULT: return(EFAULT);
case Plex86ErrnoEINVAL: return(EINVAL);
case Plex86ErrnoEACCES: return(EACCES);
case Plex86ErrnoEAGAIN: return(EAGAIN);
default:
printk(KERN_ERR "plex86: ioctlAllocVPhys: case %u\n", ret);
return(EINVAL);
}
}
Bit32u
hostOSKernelOffset(void)
{
return( KERNEL_OFFSET );
}
void
hostOSModuleCountReset(vm_t *vm, void *inode, void *filp)
{
#if LINUX_VERSION_CODE < VERSION_CODE(2,4,0)
while (MOD_IN_USE) {
MOD_DEC_USE_COUNT;
}
MOD_INC_USE_COUNT; /* bump back to 1 so release can decrement */
#endif
}
unsigned long
hostOSCopyFromUser(void *to, void *from, unsigned long len)
{
return( copy_from_user(to, from, len) );
}
unsigned long
hostOSCopyToUser(void *to, void *from, unsigned long len)
{
return( copy_to_user(to, from, len) );
}
Bit32u
hostOSGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr)
{
int ret;
struct page **pagePtr;
struct page *page;
pagePtr = (struct page **) osSpecificPtr;
ret = get_user_pages(current,
current->mm,
(unsigned long) userAddr,
1, /* 1 page. */
1, /* 'write': intent to write. */
0, /* 'force': ? */
pagePtr,
NULL /* struct vm_area_struct *[] */
);
if (ret != 1) {
printk(KERN_ERR "plex86: hostGetAndPinUserPages: failed.\n");
return(0); /* Error. */
}
page = *pagePtr; /* The returned "struct page *" value. */
/* Now that we have a list of "struct page *", one for each physical
* page of memory of the user space process's requested area, we can
* calculate the physical page address by simple pointer arithmetic
* based on "mem_map".
*/
*ppi = page - mem_map;
if (kernelAddr) {
/* Caller wants a kernel address returned which maps to this physical
* address.
*/
*kernelAddr = (Bit32u) kmap( page );
#warning "FIXME: Check return value here."
#warning "Also, conditionally compile for version and high memory support."
}
return(1); /* OK. */
}
void
hostOSUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty)
{
#if 0
/* Here is some sample code from Linux 2.4.18, mm/memory.c:__free_pte() */
struct page *page = pte_page(pte);
if ((!VALID_PAGE(page)) || PageReserved(page))
return;
if (pte_dirty(pte))
set_page_dirty(page);
free_page_and_swap_cache(page);
#endif
struct page *page;
page = (struct page *) osSpecificPtr;
/* If a kernel address is passed, that means that previously we created
* a mapping for this physical page in the kernel address pace.
* We should unmap it. Only really useful for pages allocated from
* high memory.
*/
if (kernelAddr)
kunmap(page);
/* If the page was dirtied due to the guest running in the VM, we
* need to tell the kernel about that since it is not aware of
* the VM page tables.
*/
if (dirty)
set_page_dirty(page);
/* Release/unpin the page. */
put_page(page);
}
void
hostOSInstrumentIntRedirCount(unsigned interruptVector)
{
atomic_inc( &interruptRedirCount[interruptVector] );
}

View File

@ -0,0 +1,566 @@
/*
* plex86: run multiple x86 operating systems concurrently
*
* Copyright (C) 2000 Frank van der Linden (fvdl@wasabisystems.com)
*
* host-netbsd.c: NetBSD-specific code for kernel module.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* XXXX stuff that conflicts with NetBSD namespace */
#define timer_t __bsd_timer_t
#define write_eflags __netbsd_write_eflags
#define read_eflags __netbsd_read_eflags
#include <sys/param.h>
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/conf.h>
#include <sys/exec.h>
#include <sys/lkm.h>
#include <sys/malloc.h>
#include <sys/null.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/signalvar.h>
#include <sys/mman.h>
#undef NETBSD_PLEX86_DEBUG
#if __NetBSD_Version__ > 105009900
#include <uvm/uvm_extern.h>
#include <uvm/uvm_param.h>
#else
#include <vm/vm.h>
#endif
#undef timer_t
#undef write_eflags
#undef read_eflags
#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"
int plex86_open(dev_t dev, int oflags, int devtype, struct proc *p);
int plex86_close(dev_t dev, int cflags, int devtype, struct proc *p);
paddr_t plex86_mmap(dev_t dev, off_t offset, int length);
int plex86_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags,
struct proc *p);
static int plex86_handle(struct lkm_table *, int);
static vm_t *find_vm(struct proc *);
static void register_vm(vm_t *, struct proc *);
static void unregister_all(void);
#if 0
static void unregister_vm(vm_t *, struct proc *);
#endif
static unsigned retrieve_phy_pages(Bit32u *, int, void *, unsigned, int);
static struct cdevsw plex86dev = {
plex86_open, plex86_close,
(dev_type_read((*))) enodev, (dev_type_write((*))) enodev,
plex86_ioctl, (dev_type_stop((*))) enodev, 0,
seltrue, plex86_mmap, 0
};
static struct plex86_softc {
int sc_open;
} plex86sc;
MOD_DEV("plex86", LM_DT_CHAR, -1, &plex86dev)
monitor_pages_t monitor_pages;
/*
* Hash table stuff to maintain proc <-> vm mapping.
* 23 entries should be plenty.. unless someone plans to run more than
* 23 guest OSs..
*
* Note that a process can only open the device once with this scheme.
*/
LIST_HEAD(plex86_hashhead, plex86_vmentry);
struct plex86_vmentry {
pid_t vm_pid;
vm_t *vm_vm;
LIST_ENTRY(plex86_vmentry) vm_entry;
};
struct plex86_hashhead *plex86_hashtbl;
u_long plex86_hashmask;
#define PLEX86_VMHASHSIZE 23
#define PLEX86_VMHASH(p) ((u_long)((p)->p_pid) & plex86_hashmask)
int
plex86_lkmentry(struct lkm_table *lkmtp, int cmd, int ver)
{
DISPATCH(lkmtp, cmd, ver, plex86_handle, plex86_handle, plex86_handle)
}
static int
plex86_handle(struct lkm_table *lkmtp, int cmd)
{
int error = 0;
switch (cmd) {
case LKM_E_LOAD:
if (lkmexists(lkmtp))
return EEXIST;
monitor_pages.startOffset = lkmtp->area;
monitor_pages.startOffsetPageAligned =
monitor_pages.startOffset & 0xfffff000;
monitor_pages.n_pages = lkmtp->size / PAGE_SIZE;
if (retrieve_phy_pages(monitor_pages.page,
PLEX86_MAX_MONITOR_PAGES, (void *)lkmtp->area,
lkmtp->size, 0) == 0) {
log(LOG_WARNING, "plex86: could not store physical "
"addresses for monitor pages\n");
return EIO;
}
#if __NetBSD_Version__ > 105009900
plex86_hashtbl = hashinit(PLEX86_VMHASHSIZE, HASH_LIST,
M_DEVBUF, M_WAITOK, &plex86_hashmask);
#else
plex86_hashtbl = hashinit(PLEX86_VMHASHSIZE, M_DEVBUF,
M_WAITOK, &plex86_hashmask);
#endif
if (!hostModuleInit()) {
log(LOG_WARNING, "hostModuleInit error\n");
error = EINVAL;
}
break;
case LKM_E_UNLOAD:
if (plex86sc.sc_open != 0)
return EBUSY;
free(plex86_hashtbl, M_DEVBUF);
break;
case LKM_E_STAT:
break;
default:
error = EIO;
break;
}
return error;
}
int
plex86_open(dev_t dev, int oflags, int devtype, struct proc *p)
{
vm_t *vm;
if (suser(p->p_ucred, &p->p_acflag) != 0)
return EPERM;
vm = find_vm(p);
if (vm == NULL) {
vm = malloc(sizeof (vm_t), M_DEVBUF, M_WAITOK);
if (vm == NULL)
return EIO;
memset(vm, 0, sizeof(vm_t));
register_vm(vm, p);
plex86sc.sc_open++;
} else
return EBUSY;
/* Kernel independent device open code. */
hostDeviceOpenInit(vm);
#ifdef NETBSD_PLEX86_DEBUG
printf("plex86: pid %u opened device, vm %p\n", p->p_pid, vm);
#endif
return 0;
}
int
plex86_close(dev_t dev, int cflags, int devtype, struct proc *p)
{
unregister_all();
plex86sc.sc_open = 0;
#ifdef NETBSD_PLEX86_DEBUG
printf("plex86: pid %u closed device\n", p->p_pid);
#endif
return 0;
}
paddr_t
plex86_mmap(dev_t dev, off_t offset, int prot)
{
struct proc *p = curproc;
vm_t *vm;
int page;
off_t endguestoff;
vm = find_vm(p);
if (vm == NULL)
return ENXIO;
#if 1
#warning "kludge to mmap message buffer"
endguestoff = (off_t)(vm->pages.guest_n_megs * 1024 * 1024);
if (offset >= endguestoff && prot == PROT_READ) {
page = (offset - endguestoff) / PAGE_SIZE;
return vm->pages.log_buffer[page];
}
#endif
page = offset / PAGE_SIZE;
if (page < 0 || page > vm->pages.guest_n_pages) {
log(LOG_WARNING, "plex86: mmap: offset %lx out of range\n",
(unsigned long)offset);
return -1;
}
return vm->pages.guest[page];
}
int
plex86_ioctl(dev_t dev, u_long cmd, caddr_t data, int flags,
struct proc *p)
{
int error;
vm_t *vm;
vm = find_vm(p);
if (vm == NULL)
return EINVAL;
switch (cmd) {
case PLEX86_ALLOCVPHYS:
{
unsigned long arg = *((unsigned long*)data);
guest_cpu_t guest_cpu;
if (vm->mon_state != MON_STATE_UNINITIALIZED ||
vm->pages.guest_n_megs != 0)
return EBUSY;
printf("plex86_ioctl: ALLOCVPHYS: requested size %lu\n",
arg);
if (arg > PLEX86_MAX_PHY_MEGS || arg < 4 || (arg & ~0x3) != arg)
return EINVAL;
/* Allocate memory */
error = allocVMPages(vm, arg);
if (error != 0) {
log(LOG_WARNING, "plex86: allocVMPages failed (%d)\n",
error);
return ENOMEM;
}
if (init_guest_phy_mem(vm) != 0) {
log(LOG_ERR, "plex86: init_guest_phy_mem failed\n");
unallocVMPages(vm);
return EFAULT;
}
getCpuResetValues(&guest_cpu);
log(LOG_WARNING, "plex86: cpu.cr0 = 0x%x\n", guest_cpu.cr0);
if (!init_monitor(vm, 0, 0, &guest_cpu) ||
!setGuestCPU(vm, 0, &guest_cpu) ||
!mapMonitor(vm, guest_cpu.eflags, 0)) {
log(LOG_ERR, "plex86: init_monitor failed\n");
unallocVMPages(vm);
return EFAULT;
}
break;
}
case PLEX86_TEARDOWN:
unallocVMPages(vm);
break;
case PLEX86_ALLOCINT:
return EINVAL;
case PLEX86_RELEASEINT:
return EINVAL;
case PLEX86_PRESCANDEPTH:
{
unsigned long arg = *(unsigned long *)data;
if ((arg < PrescanDepthMin) || (arg > PrescanDepthMax)) {
log(LOG_WARNING, "plex86: Requested prescan depth %lu"
" out of range [%u..%u]\n", arg, PrescanDepthMin,
PrescanDepthMax);
return EINVAL;
}
vm->prescanDepth = (unsigned)arg;
break;
}
case PLEX86_SETINTR:
ioctlSetIntr(vm, *(unsigned long *)data);
break;
case PLEX86_SET_A20:
{
unsigned long arg = *(unsigned long *)data;
if (!ioctlSetA20E(vm, arg))
return EINVAL;
break;
}
case PLEX86_MESSAGEQ:
{
vm_messages_t msg;
if (vm->mon_state != MON_STATE_RUNNABLE)
return EINVAL;
error = copyin(*(void **)data, &msg.header, sizeof msg.header);
if (error != 0)
return error;
if ((msg.header.msg_len + sizeof(msg.header)) > sizeof(msg))
return EINVAL;
if (msg.header.msg_len != 0) {
error = copyin(&((vm_messages_t *)*(void **)data)->msg,
&msg.msg, msg.header.msg_len);
if (error != 0)
return error;
}
#warning "deal with LDT %gs and %fs that the NetBSD kernel uses"
/* XXXX */
__asm("movl $0, %eax");
__asm("movl %eax, %gs");
__asm("movl %eax, %fs");
if (ioctlMessageQ(vm, &msg)) {
log(LOG_WARNING, "plex86: ioctlMessageQ failed\n");
return EINVAL;
}
error = copyout(&msg, *(void **)data,
sizeof (msg.header) + msg.header.msg_len);
return error;
}
case PLEX86_RESET:
break;
case PLEX86_PHYMEM_MOD:
break;
case PLEX86_FORCE_INT:
if (vm->mon_state != MON_STATE_RUNNABLE)
return -EINVAL;
vm->dbg_force_int = 0x100 | (unsigned)data;
break;
case PLEX86_PRESCANRING3:
{
unsigned long arg = *(unsigned long *)data;
if (arg > PrescanRing3On) {
log(LOG_WARNING,
"plex86: Requested PrescanRing3 val(%lu) OOB\n",
arg);
return EINVAL;
}
vm->prescanRing3 = arg;
break;
}
case PLEX86_GENERIC:
return 0;
default:
log(LOG_WARNING, "plex86: unknown ioctl %lx\n", cmd);
return EINVAL;
}
return 0;
}
static void
register_vm(vm_t *vm, struct proc *p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
#ifdef DIAGNOSTIC
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p_pid)
panic("plex86: vm already registered, pid %u\n",
p->pid);
}
#endif
vhp = malloc(sizeof (struct plex86_vmentry), M_DEVBUF, M_WAITOK);
vhp->vm_pid = p->p_pid;
vhp->vm_vm = vm;
LIST_INSERT_HEAD(php, vhp, vm_entry);
}
#if 0
static void
unregister_vm(vm_t *vm, struct proc *p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p->p_pid) {
LIST_REMOVE(vhp, vm_entry);
free(vhp->vm_vm, M_DEVBUF);
free(vhp, M_DEVBUF);
break;
}
}
}
#endif
static void
unregister_all(void)
{
int i;
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
for (i = 0; i < PLEX86_VMHASHSIZE; i++) {
php = &plex86_hashtbl[i];
for (vhp = php->lh_first; vhp != NULL;
vhp = vhp->vm_entry.le_next) {
#ifdef NETBSD_PLEX86_DEBUG
printf("plex86: unregister vm %p, pid %u\n",
vhp->vm_vm, vhp->vm_pid);
#endif
LIST_REMOVE(vhp, vm_entry);
free(vhp->vm_vm, M_DEVBUF);
free(vhp, M_DEVBUF);
}
}
}
static vm_t *
find_vm(struct proc *p)
{
struct plex86_hashhead *php;
struct plex86_vmentry *vhp;
php = &plex86_hashtbl[PLEX86_VMHASH(p)];
for (vhp = php->lh_first; vhp != NULL; vhp = vhp->vm_entry.le_next) {
if (vhp->vm_pid == p->p_pid)
return vhp->vm_vm;
}
return NULL;
}
static unsigned
retrieve_phy_pages(Bit32u *page, int max_pages, void *addr_v, unsigned size,
int aligned)
{
Bit32u start_addr;
unsigned n_pages, i;
if (!aligned)
start_addr = (Bit32u)addr_v & ~(PAGE_SIZE-1);
else {
start_addr = (Bit32u)addr_v;
if (start_addr & (PAGE_SIZE -1)) {
log(LOG_WARNING, "plex86: retrieve_phy_pages: address "
"%p not aligned\n", addr_v);
return 0;
}
}
n_pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
if (n_pages > max_pages) {
log(LOG_WARNING, "plex86: retrieve_phy_pages: page list "
"too small\n");
return 0;
}
for (i = 0; i < n_pages; i++) {
page[i] = vtophys((vaddr_t)start_addr) / PAGE_SIZE;
start_addr += PAGE_SIZE;
}
return n_pages;
}
unsigned
host_idle(void)
{
if (want_resched)
yield();
return (CURSIG(curproc) == 0);
}
void *
host_alloc(unsigned long size)
{
/*
* XXX - it wants this page-aligned apparently.
*/
if (size <= (PAGE_SIZE / 2))
size = PAGE_SIZE;
return malloc(size, M_DEVBUF, M_WAITOK);
}
void
host_free(void *ptr)
{
free(ptr, M_DEVBUF);
}
unsigned
host_map(Bit32u *page, int max_pages, void *ptr, unsigned size)
{
return retrieve_phy_pages(page, max_pages, ptr, size, 1);
}
void *
host_alloc_page(void)
{
return malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
}
void
host_free_page(void *ptr)
{
return free(ptr, M_DEVBUF);
}
Bit32u
host_map_page(void *ptr)
{
Bit32u u;
if (ptr == NULL)
return 0;
u = vtophys(ptr) / PAGE_SIZE;
/* printf("host_map_page(%p) -> %x\n", ptr, u); */
return u;
}
void
hostprint(char *fmt, ...)
{
va_list args;
int ret;
unsigned char buffer[256];
va_start(args, fmt);
ret = mon_vsnprintf(buffer, 256, fmt, args);
if (ret == -1)
log(LOG_WARNING,
"plex86: hostprint: vsnprintf returns error.\n");
else
log(LOG_WARNING, "plex86: %s\n", buffer);
}

View File

@ -0,0 +1,161 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* host-null.c: host OS specific stubs. These provide a reference for
* ports of plex86 to various host OSes.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_HOST_SPACE
#include "monitor.h"
/* Note: for comments on what various functions are expected to do, as
* well as a reference implemntation, read the 'host-linux.c' file.
* It's likely the most up-to-date.
*/
/* OS specific includes here. */
/* Some declarations for the entry points etc here. */
kernelModulePages_t kernelModulePages;
#define NULL 0
int
main(int argc, char *argv[])
{
vm_t *vm = NULL;
hostModuleInit();
hostDeviceOpen(vm);
hostIoctlGeneric(vm, NULL, NULL, 0, 0);
return(0);
}
void
hostOSReservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages)
{
}
void
hostOSUnreservePhyPages(vm_t *vm, Bit32u *hostPhyPages, unsigned nPages)
{
}
unsigned
hostOSIdle(void)
{
return 0;
}
void *
hostOSAllocZeroedMem(unsigned long size)
{
return 0;
}
void
hostOSFreeMem(void *ptr)
{
}
void *
hostOSAllocZeroedPage(void)
{
return 0;
}
void
hostOSFreePage(void *ptr)
{
}
unsigned
hostOSGetAllocedMemPhyPages(Bit32u *page, int max_pages, void *ptr, unsigned size)
{
return 0;
}
Bit32u
hostOSGetAllocedPagePhyPage(void *ptr)
{
return 0;
}
void
hostOSPrint(char *fmt, ...)
{
}
int
hostOSConvertPlex86Errno(unsigned ret)
{
return 0;
}
Bit32u
hostOSKernelOffset(void)
{
return 0;
}
void
hostOSModuleCountReset(vm_t *vm, void *inode, void *filp)
{
}
unsigned long
hostOSCopyFromUser(void *to, void *from, unsigned long len)
{
return 0;
}
unsigned long
hostOSCopyToUser(void *to, void *from, unsigned long len)
{
return 0;
}
Bit32u
hostOSGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr)
{
return 0;
}
void
hostOSUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty)
{
}
void
hostOSInstrumentIntRedirCount(unsigned interruptVector)
{
}

View File

@ -0,0 +1,66 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* descriptor2.h: defines for descriptors and selectors
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __DESCRIPTOR2_H__
#define __DESCRIPTOR2_H__
typedef struct
{
Bit16u offset_low;
selector_t selector;
unsigned count:5;
unsigned RESERVED:3;
unsigned type:5;
unsigned dpl:2;
unsigned p:1;
Bit16u offset_high;
} __attribute__ ((packed)) gate_t;
#define SET_INT_GATE(d, S,O,P,DPL, D) {\
d.selector = (S);\
d.offset_high = (O) >> 16;\
d.offset_low = (O) & 0xffff;\
d.RESERVED = 0;\
d.type = ((D)<<3) | 0x6;\
d.dpl = (DPL);\
d.p = (P);\
}
#define SET_TRAP_GATE(d, S,O,P,DPL, D) {\
d.selector = (S);\
d.offset_high = (O) >> 16;\
d.offset_low = (O) & 0xffff;\
d.RESERVED = 0;\
d.type = ((D)<<3) | 0x7;\
d.dpl = (DPL);\
d.p = (P);\
}
typedef struct
{
Bit32u offset;
Bit16u selector;
} __attribute ((packed)) far_jmp_info_t;
#endif /* __DESCRIPTOR2_H__ */

View File

@ -0,0 +1,78 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* eflags.h: Bitfields of EFLAGS registers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __EFLAGS_H__
#define __EFLAGS_H__
/*
* the eflags field looks like this:
* bit: 0 1 2 3 4 5 6 7 8 9 A B C/D E F 10 11 12 13 14 15 16
* flg: CF 1 PF 0 AF 0 ZF SF TF IF DF OF IOPL NT 0 RF VM AC VIF VIP ID 0
*/
#define FLG_CF (1<<0)
#define FLG_PF (1<<2)
#define FLG_AF (1<<4)
#define FLG_ZF (1<<6)
#define FLG_SF (1<<7)
#define FLG_TF (1<<8)
#define FLG_IF (1<<9)
#define FLG_DF (1<<10)
#define FLG_OF (1<<11)
#define FLG_IOPL (3<<12)
#define FLG_NT (1<<14)
#define FLG_RF (1<<16)
#define FLG_VM (1<<17)
#define FLG_AC (1<<18)
#define FLG_VIF (1<<19)
#define FLG_VIP (1<<20)
#define FLG_ID (1<<21)
typedef union {
struct {
Bit8u cf:1;
Bit8u R1:1;
Bit8u pf:1;
Bit8u R3:1;
Bit8u af:1;
Bit8u R5:1;
Bit8u zf:1;
Bit8u sf:1;
Bit8u tf:1;
Bit8u if_:1;
Bit8u df:1;
Bit8u of:1;
Bit8u iopl:2;
Bit8u nt:1;
Bit8u R15:1;
Bit8u rf:1;
Bit8u vm:1;
Bit8u ac:1;
Bit8u vif:1;
Bit8u vip:1;
Bit8u id:1;
Bit16u R31_22:10;
} __attribute__ ((packed)) fields;
Bit32u raw;
} __attribute__ ((packed)) eflags_t;
#endif /* __EFLAGS_H__ */

View File

@ -0,0 +1,59 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* guest_context.h: monitor stack frame after exception/interrupt
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __GUEST_CONTEXT_H__
#define __GUEST_CONTEXT_H__
#include "eflags.h"
/* This is the guest context (from ring3) pushed on the monitor stack (ring0)
* during an exception/interrupt. Part is pushed automatically by the
* CPU, part by the interrupt handling code.
*
* Values are pushed starting with the end of this structure, towards
* the beginning, since stack pushes descend in address.
*/
typedef struct {
Bit32u gs;
Bit32u fs;
Bit32u ds;
Bit32u es;
Bit32u edi;
Bit32u esi;
Bit32u ebp;
Bit32u dummy_esp;
Bit32u ebx;
Bit32u edx;
Bit32u ecx;
Bit32u eax;
Bit32u vector;
Bit32u error;
Bit32u eip;
Bit32u cs;
eflags_t eflags;
Bit32u esp;
Bit32u ss;
} guest_context_t;
#endif /* __GUEST_CONTEXT_H__ */

View File

@ -0,0 +1,709 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* monitor.h: main VM monitor defines
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __MONITOR_H__
#define __MONITOR_H__
#if defined(__NetBSD__) || defined(__FreeBSD__)
#include <machine/stdarg.h>
#else
#include <stdarg.h>
#endif
#include "descriptor.h"
#include "descriptor2.h"
#include "tss.h"
#include "paging.h"
#include "eflags.h"
#include "guest_context.h"
#ifndef UNUSED
# define UNUSED(x) ((void)(x))
#endif
/* Method1: push event info (CPU pushes error code before) */
typedef struct
{
Bit8u pushl; /* Always 0x68 == pushl */
Bit32u vector; /* Interrupt vector number */
Bit8u jmp; /* Always 0xe9 == jmp */
Bit32u reloc; /* Relative offset of destination */
} __attribute__ ((packed)) idt_method1_t;
/* Method2: push a dummy error first, then event info */
typedef struct
{
Bit8u pushla; /* Always 0x68 == pushl */
Bit32u dummy; /* Dummy error code */
Bit8u pushlb; /* Always 0x68 == pushl */
Bit32u vector; /* Interrupt vector number */
Bit8u jmp; /* Always 0xe9 == jmp */
Bit32u reloc; /* Relative offset of destination */
} __attribute__ ((packed)) idt_method2_t;
typedef union
{
idt_method1_t m1;
idt_method2_t m2;
} idt_stub_t;
/* Nexus fields. This C structure maps to identical assembly */
/* fields in nexus.S. Make sure to update both! These fields */
/* are accessible to the nexus code during the transition from */
/* host<->guest and are stored in a single page. */
typedef struct {
/* guest pointer to vm_t structure. */
void *vm;
/* These fields are only used by the transition code. */
/* They hold all info necessary to switch back to the host. */
gdt_info_t host_gdt_info;
gdt_info_t host_idt_info;
far_jmp_info_t host_jmp_info;
far_jmp_info_t host_stack_info;
Bit16u host_ldt_sel;
Bit16u host_tss_sel;
Bit32u host_cr0;
Bit32u host_cr2;
Bit32u host_cr3;
Bit32u host_cr4;
/* These fields are filled by the host-side code, and used */
/* by the transition code. They contain all info necessary */
/* to switch to the monitor/guest address space. */
/* This info changes whenever the monitor migrates. */
gdt_info_t mon_gdt_info;
gdt_info_t mon_idt_info;
far_jmp_info_t mon_jmp_info;
far_jmp_info_t mon_stack_info;
Bit16u mon_ldt_sel;
Bit16u mon_tss_sel;
Bit32u mon_base;
Bit32u mon_cr0;
Bit32u mon_cr3;
Bit32u mon_cr4;
Bit32u mon_eflags;
/* These fields contain info used by the transition code to */
/* create the temporary identity mapping. They never change. */
pageEntry_t transition_pde;
pageEntry_t *transition_pde_p_host;
pageEntry_t *transition_pde_p_mon;
Bit32u transition_laddr;
} __attribute__ ((packed)) nexus_t;
/* For reference, the following describes where bits from the guest */
/* eflags register are stored/managed. */
/* */
/* Key: */
/* g: Flag value as requested by guest */
/* V: Virtualized flag value, as loaded in eflags when guest is executing */
/* ?: Unhandled yet, request of set bit causes panic for now */
/* */
/* === ======= ====== ======= ======= ======= */
/* |I|V|V|A|V|R|0|N|IO|O|D|I|T|S|Z|0|A|0|P|1|C| flag */
/* |D|I|I|C|M|F| |T|PL|F|F|F|F|F|F| |F| |F| |F| */
/* | |P|F| | | | | | | | | | | | | | | | | | | */
/* |g|?|?|g|V|g|g|g|VV|g|g|V|g|g|g|g|g|g|g|g|g| context->eflags */
/* | |?|?| |g| | | |gg| | |g| | | | | | | | | | veflags */
/* #define VirtualizedEflags 0x001a3200 */
#define VirtualizedEflags 0x001a3300
/* I define the 'nexus' as the set of data structures which */
/* must exist in the current linear guest address space. The */
/* host linear address space is not available while the current */
/* guest code is running, since we are using a completely */
/* different set of page mappings for the guest. However, */
/* at some point an exception/interrupt will occur. The */
/* interrupt mechanisms require that several structures exist in */
/* the current linear address space in order to service such */
/* an event. These data structures make up part of our VM, */
/* a thin layer which exists in the guest. Following is a */
/* list of what data structures compose this 'nexus': */
/* */
/* - IDT (max 2048 bytes) */
/* - GDT (max 65536 bytes) */
/* - LDT (max 65536 bytes) */
/* - TSS (max 8328 = 104 + 32 int redir + 8192 I/O permissions) */
/* - kernel stack page */
/* - transition code (host <--> guest) */
/* - interrupt handler stubs */
/* - Page Tables; PDE & PTE pages. */
/*
* Sizes of various nexus data structures used by the monitor
*/
#define PLEX86_MAX_PHY_MEGS 32
#define PAGESIZE 4096
#define IDT_STUB_SIZE 15
#define BytesToPages(b) ( ((b)+4095) >> 12 )
#define MON_IDT_SIZE (8*256)
#define MON_GDT_SIZE (8*512)
#define MON_LDT_SIZE (8*1)
#define MON_IDT_STUBS_SIZE (IDT_STUB_SIZE*256)
#define MON_TSS_SIZE (104)
#define MON_IDT_PAGES BytesToPages(MON_IDT_SIZE)
#define MON_GDT_PAGES BytesToPages(MON_GDT_SIZE)
#define MON_LDT_PAGES BytesToPages(MON_LDT_SIZE)
#define MON_IDT_STUBS_PAGES BytesToPages(MON_IDT_STUBS_SIZE)
#define MON_TSS_PAGES BytesToPages(MON_TSS_SIZE)
#define MAX_MON_GUEST_PAGES (PLEX86_MAX_PHY_MEGS * 256)
/* +++ MON_PAGE_TABLES is kind of random */
#define MON_PAGE_TABLES (10*((PLEX86_MAX_PHY_MEGS+3) >> 2))
#define MAX_VM_STRUCT_PAGES (68)
#define LOG_BUFF_PAGES 1
#define LOG_BUFF_SIZE ((LOG_BUFF_PAGES)*4096)
/*
* Pages allocated for the VM by the host kernel driver.
* N Megs of physical memory are allocated, per the user's
* request, for the guest OS/application code.
* Additionally, some other overhead pages are allocated
* for structures such as the page directory, page tables,
* and other virtualized facilities.
*/
typedef struct {
/* requested size of the guest[] array in megs and pages */
unsigned guest_n_megs;
unsigned guest_n_pages;
unsigned guest_n_bytes;
/* pages comprising the vm_t struct itself. */
Bit32u vm[MAX_VM_STRUCT_PAGES];
/* for the monitor's page directory */
Bit32u page_dir;
/* for the monitor's page table */
Bit32u page_tbl[MON_PAGE_TABLES];
/* Map of the linear addresses of page tables currently */
/* mapped into the monitor space. */
Bit32u page_tbl_laddr_map;
/* for the extra page table that maps our nexus code and structures */
Bit32u nexus_page_tbl;
/* For the CPU state passed between user and kernel/monitor space. */
Bit32u guest_cpu;
void *guest_cpu_hostOSPtr;
/* We need a Page Table for identity mapping the transition code */
/* between host and monitor spaces. */
Bit32u transition_PT;
Bit32u log_buffer[LOG_BUFF_PAGES];
void *log_buffer_hostOSPtr[LOG_BUFF_PAGES];
/* Physical addresses of host pages which comprise the actual */
/* monitor structures. These will be mapped into the current */
/* guest task's linear address space as well. */
Bit32u nexus;
Bit32u idt[MON_IDT_PAGES];
Bit32u gdt[MON_GDT_PAGES];
Bit32u ldt[MON_LDT_PAGES];
Bit32u tss[MON_TSS_PAGES];
Bit32u idt_stubs[MON_IDT_STUBS_PAGES];
} vm_pages_t;
typedef struct {
pageEntry_t *page_dir;
page_t *page_tbl;
unsigned *page_tbl_laddr_map;
page_t *nexus_page_tbl;
guest_cpu_t *guest_cpu;
page_t *transition_PT;
unsigned char *log_buffer;
Bit8u *code_phy_page; /* only use in mon space */
Bit8u *tmp_phy_page0; /* only use in mon space */
Bit8u *tmp_phy_page1; /* only use in mon space */
nexus_t *nexus;
/* Pointer into the monitor stack, so we can easily retrieve the */
/* stack snapshot upon interrupt/exception. */
guest_context_t *guest_context;
gate_t *idt;
descriptor_t *gdt;
descriptor_t *ldt;
tss_t *tss;
idt_stub_t *idt_stubs;
} vm_addr_t;
/* These bits define the possible usage and attributes assigned */
/* to a particular guest physical page. These are useful for keeping */
/* track of what kinds of system structures are contained in a page */
/* at a given time, and if the page has associated cached code */
/* information in the prescan logic. We can also tag particular */
/* pages with other more static attributes. */
typedef union {
struct {
Bit32u access_perm:2; /* */
Bit32u lmap_count:2; /* */
Bit32u ptbl:1; /* page table */
Bit32u pdir:1; /* page directory */
Bit32u spare0:1; /* (spare) */
Bit32u memMapIO:1; /* MemMapIO */
Bit32u RO:1; /* RO */
Bit32u allocated:1; /* Allocated */
Bit32u pinned:1; /* Pinned by host OS. */
Bit32u spare1:1; /* (spare) */
Bit32u laddr_backlink:20; /* 1st unvirtualized laddr backlink */
} __attribute__ ((packed)) fields;
Bit32u raw;
} __attribute__ ((packed)) phy_page_attr_t;
typedef struct {
phy_page_attr_t attr;
Bit64u tsc; /* for comparing to CR3 timestamp counter */
Bit32u hostPPI;
} __attribute__ ((packed)) phyPageInfo_t;
/* Possible values of the access_perm field above. */
#define PagePermRW 0
#define PagePermRO 1
#define PagePermEmulate 2
#define PagePermNA PagePermEmulate /* No Access is synomym */
/* Bitmasks to access fields in structure above. */
#define PageUsagePTbl 0x010
#define PageUsagePDir 0x020
#define PageUsageMemMapIO 0x080
#define PageUsageRO 0x100
#define PageUsageAllocated 0x200
#define PageUsageSwappable 0x400
/* Group of attributes which retain their value, even when CR3 */
/* is reloaded and the page mappings are flushed. */
#define PageUsageSticky \
( PageUsageMemMapIO | PageUsageRO | \
PageUsageAllocated | PageUsageSwappable )
/* Group of attributes which are not compatible with a Page Table */
/* occupying a physical page. */
#define PageBadUsage4PTbl \
( PageUsagePDir | PageUsageMemMapIO | PageUsageRO )
/* Group of attributes which are not compatible with a Page Directory */
/* occupying a physical page. Keep in mind, when the PDir is marked, */
/* no other dynamic bits will be set. */
#define PageBadUsage4PDir \
( PageUsageMemMapIO | PageUsageRO )
#define PageUsageCausesNA \
( PageUsagePTbl | PageUsagePDir | PageUsageMemMapIO )
#define PageUsageCausesRO \
( PageUsageRO )
#define PDEUnhandled 0x000001d8
#define PTEUnhandled 0x00000198
#define ExceptionDE 0 /* Divide Error (fault) */
#define ExceptionDB 1 /* Debug (fault/trap) */
#define ExceptionBP 3 /* Breakpoint (trap) */
#define ExceptionOF 4 /* Overflow (trap) */
#define ExceptionBR 5 /* BOUND (fault) */
#define ExceptionUD 6
#define ExceptionNM 7
#define ExceptionDF 8
#define ExceptionTS 10
#define ExceptionNP 11
#define ExceptionSS 12
#define ExceptionGP 13
#define ExceptionPF 14
#define ExceptionMF 16
#define ExceptionAC 17
#define CR0_PE (1<<0)
#define CR0_MP (1<<1)
#define CR0_EM (1<<2)
#define CR0_TS (1<<3)
#define CR0_ET (1<<4)
#define CR0_NE (1<<5)
#define CR0_WP (1<<16)
#define CR0_AM (1<<18)
#define CR0_NW (1<<29)
#define CR0_CD (1<<30)
#define CR0_PG (1<<31)
/*
* Complete state of the VM (Virtual Machine).
*/
typedef struct {
Bit32u guestPhyMemAddr; /* Ptr to malloced memory from user space. */
/* Store eflags values of the guest which are virtualized to
* run in the monitor
*/
eflags_t veflags;
unsigned executeMethod;
unsigned vmState;
unsigned mon_request;
unsigned guestFaultNo;
Bit32u pinReqPPI;
unsigned redirect_vector;
Bit32u kernel_offset;
#define MonitorSpace 0
#define UserSpace 1
#define HostSpace 2
volatile unsigned inMonFault;
/* Extra info on aborts, especially when a message can't
* be printed out
*/
unsigned abort_code;
struct {
Bit64u t0; /* TSC before excecution of guest code */
Bit64u cyclesElapsed; /* Cycles of guest execution */
unsigned a20Enable; /* A20 line enabled? */
Bit32u a20AddrMask; /* mask to apply to phy address */
Bit32u a20IndexMask; /* mask to apply to phy address */
} system;
cpuid_info_t guestCPUIDInfo;
/* This macro yields a physical address after applying the A20 line
* enable mask to the original physical address.
*/
#define A20Addr(vm, paddr) ( (paddr) & ((vm)->system.a20AddrMask) )
#define A20PageIndex(vm, pi) ( (pi) & ((vm)->system.a20IndexMask) )
/* Keep an index of the next available Page Table */
unsigned ptbl_laddr_map_i;
Bit32u mon_pde_mask; /* Upper 10 bits of monitor lin addr space */
Bit32u mon_pdi; /* Same value shifted down 22 bits. */
Bit64u vpaging_tsc; /* time stamp of last page mappings flush */
/* We need to keep track of what each of the guest's physical */
/* pages contains, and maintain some additional attributes. */
/* We determine which kinds of information reside in the page, */
/* dynamically. */
phyPageInfo_t pageInfo[MAX_MON_GUEST_PAGES];
/* This is a hack for now. I need to store the "struct page *"
* information returned by get_user_pages() in the Linux kernel.
* Should clean this up.
*/
void *hostStructPagePtr[MAX_MON_GUEST_PAGES];
/* A revolving queue, which stores information on guest physical memory
* pages which are currently pinned. Only a certain number of pages
* may be pinned at any one time. This is a really simplistic
* strategy - when the Q is full, the page which was pinned the
* longest time ago is unpinned to make room. It's a
* "least recently pinned" strategy.
*/
#define MaxPhyPagesPinned 1024 /* 4Megs of pinned pages max per VM. */
struct {
unsigned nEntries; /* Number of entries in table. */
unsigned tail;
Bit32u ppi[MaxPhyPagesPinned]; /* Physical Page Index of pinned guest page. */
} guestPhyPagePinQueue;
struct {
volatile unsigned event; /* Any log event occurred. */
/* Inactive, OK to dump to host and change */
volatile unsigned locked;
/* Number of times buffer wrapped since last print to kernel */
/* debug facility */
volatile unsigned offset; /* Current index within buffer */
volatile unsigned error; /* Error printing. (ex. string too long) */
} log_buffer_info;
vm_pages_t pages; /* memory pages allocated by the host */
/* Host specific fields. These fields should NOT be accessed */
/* from code which may execute in either host or monitor/guest */
/* spaces, unless you need to _specifically_ manipulate a */
/* host-specific field. */
struct {
vm_addr_t addr; /* addresses of data structures in host space */
void (*__host2mon)(void); /* Host to guest nexus entry point */
pageEntry_t nexus_pde; /* PDE pointing to nexus page table */
} host;
/* Guest specific fields. These fields should NOT be accessed */
/* from code which may execute in either host or monitor/guest */
/* spaces, unless you need to _specifically_ manipulate a */
/* guest-specific field. */
struct {
vm_addr_t addr; /* addresses of data structures in guest space */
void (*__mon2host)(void); /* monitor to host entry point */
} guest;
} vm_t;
extern char __nexus_start, __nexus_end, __mon_cs;
extern char __host2mon, __mon2host, __handle_fault, __handle_int;
extern char __ret_to_guest;
/*
* This structure describes the pages containing the code/data
* of the monitor itself (inside the kernel module)
*/
#define Plex86MaxKernelModulePages 128
typedef struct {
/* Virtual address space occupied by the kernel module. */
Bit32u startOffset;
Bit32u startOffsetPageAligned;
unsigned nPages; /* Number of pages. */
/* A list of the Physical Page Indeces of the pages comprising the
* kernel module. A PPI is just the physical page address >> 12.
*/
Bit32u ppi[Plex86MaxKernelModulePages];
} kernelModulePages_t;
extern kernelModulePages_t kernelModulePages;
extern cpuid_info_t hostCpuIDInfo;
#if !defined(IN_HOST_SPACE) && !defined(IN_MONITOR_SPACE)
#error "No space defined for this file"
#endif
#if defined(IN_HOST_SPACE) || defined(IN_MONITOR_SPACE)
void mon_memzero(void *ptr, int size);
void mon_memcpy(void *dst, void *src, int size);
void *mon_memset(void *s, unsigned c, unsigned n);
/*
* We need to set the monitor CS/DS base address so that the module pages,
* which are mapped starting at linear address 'laddr' into the guest address
* space, reside at the same offset relative to the monitor CS base as they
* reside relative to the kernel CS base in the host address space. This way,
* we can execute the (non-relocatable) module code within the guest address
* space ...
*/
#define MON_BASE_FROM_LADDR(laddr) \
((laddr) - kernelModulePages.startOffsetPageAligned)
/* ============================================================
* These are the functions which are available in either of the
* host or monitor/guest spaces.
*/
/* Access to label offsets in nexus.S... From the host address perspective */
#define HOST_NEXUS_OFFSET(vm, field) \
( ((Bit32u)vm->host.addr.nexus) + \
(((Bit32u) &field) - ((Bit32u) &__nexus_start)) )
/* From the monitor/guest address perspective. */
#define MON_NEXUS_OFFSET(vm, field) \
( ((Bit32u)vm->guest.addr.nexus) + \
(((Bit32u) &field) - ((Bit32u) &__nexus_start)) )
static __inline__ Bit64u
vm_rdtsc(void) {
Bit64u ret;
asm volatile (
"rdtsc"
: "=A" (ret)
);
return ret;
}
#endif /* {HOST, MONITOR} */
#ifdef IN_HOST_SPACE
/* ==========================================================
* These are the functions which are available to the monitor
* running in the host space.
*/
/*
* Generate a software interrupt
*/
#define soft_int(n) \
asm volatile ( \
" movb %b0, __soft_int_vector \n\t" \
" jmp __soft_int_n \n\t" \
"__soft_int_n: \n\t" \
" sti \n\t" \
" .byte 0xcd \n\t" \
"__soft_int_vector: \n\t" \
" .byte 0x00 \n\t" \
: \
: "r" ((Bit8u) (n) ) \
: "memory" \
)
#define Plex86ErrnoEBUSY 1
#define Plex86ErrnoENOMEM 2
#define Plex86ErrnoEFAULT 3
#define Plex86ErrnoEINVAL 4
#define Plex86ErrnoEACCES 5
#define Plex86ErrnoEAGAIN 6
#define vm_save_flags(x) \
asm volatile("pushfl ; popl %0": "=g" (x): :"memory")
#define vm_restore_flags(x) \
asm volatile("pushl %0 ; popfl": :"g" (x): "memory", "cc")
int hostInitMonitor(vm_t *);
unsigned hostMapMonitor(vm_t *);
unsigned hostInitGuestPhyMem(vm_t *);
void hostUnallocVmPages(vm_t *);
int hostAllocVmPages(vm_t *, plex86IoctlRegisterMem_t *registerMsg);
void hostInitShadowPaging(vm_t *vm);
void hostDeviceOpen(vm_t *);
unsigned hostModuleInit(void);
unsigned hostGetCpuCapabilities(void);
int hostIoctlGeneric(vm_t *vm, void *inode, void *filp,
unsigned int cmd, unsigned long arg);
int hostIoctlExecute(vm_t *vm, plex86IoctlExecute_t *executeMsg);
int hostIoctlRegisterMem(vm_t *vm, plex86IoctlRegisterMem_t *registerMsg);
void hostCopyGuestStateToUserSpace(vm_t *vm);
void hostReleasePinnedUserPages(vm_t *vm);
unsigned hostHandlePagePinRequest(vm_t *vm, Bit32u reqPPI);
/* These are the functions that the host-OS-specific file of the
* plex86 device driver must define.
*/
unsigned hostOSIdle(void);
void *hostOSAllocZeroedMem(unsigned long size);
void hostOSFreeMem(void *ptr);
void *hostOSAllocZeroedPage(void);
void hostOSFreePage(void *ptr);
unsigned hostOSGetAllocedMemPhyPages(Bit32u *page, int max_pages, void *ptr,
unsigned size);
Bit32u hostOSGetAndPinUserPage(vm_t *vm, Bit32u userAddr, void **osSpecificPtr,
Bit32u *ppi, Bit32u *kernelAddr);
void hostOSUnpinUserPage(vm_t *vm, Bit32u userAddr, void *osSpecificPtr,
Bit32u ppi, Bit32u *kernelAddr, unsigned dirty);
Bit32u hostOSGetAllocedPagePhyPage(void *ptr);
void hostOSPrint(char *fmt, ...);
Bit32u hostOSKernelOffset(void);
int hostOSConvertPlex86Errno(unsigned ret);
void hostOSModuleCountReset(vm_t *vm, void *inode, void *filp);
void hostOSInstrumentIntRedirCount(unsigned interruptVector);
unsigned long hostOSCopyFromUser(void *to, void *from, unsigned long len);
unsigned long hostOSCopyToUser(void *to, void *from, unsigned long len);
#endif /* HOST Space */
#ifdef IN_MONITOR_SPACE
/* ==========================================================
* These are the functions which are available to the monitor
* running in the monitor/guest space.
*/
void sysFlushPrintBuf(vm_t *);
void sysRemapMonitor(vm_t *);
int monprint(vm_t *, char *fmt, ...);
int mon_vsnprintf(char *str, unsigned size, const char *fmt,
va_list args);
void resetPrintBuf(vm_t *);
/* Translate from guest laddr to monitor laddr. */
#define Guest2Monitor(vm, laddr) ( ((Bit32u) (laddr)) - \
vm->guest.addr.nexus->mon_base )
void monpanic(vm_t *, char *fmt, ...) __attribute__ ((noreturn));
void monpanic_nomess(vm_t *);
void toHostGuestFault(vm_t *, unsigned fault);
void toHostPinUserPage(vm_t *, Bit32u ppi);
void guestPageFault(vm_t *, guest_context_t *context, Bit32u cr2);
void *open_guest_phy_page(vm_t *, Bit32u ppage_index, Bit8u *mon_offset);
void close_guest_phy_page(vm_t *, Bit32u ppage_index);
#define MapLinOK 0
#define MapLinMonConflict 1
#define MapLinAlreadyMapped 2
#define MapLinPPageOOB 3
#define MapLinException 4
#define MapLinEmulate 5
unsigned mapGuestLinAddr(vm_t *, Bit32u guest_laddr,
Bit32u *guest_ppage_index, unsigned us,
unsigned rw, Bit32u attr, Bit32u *error);
unsigned addPageAttributes(vm_t *, Bit32u ppi, Bit32u attr);
phyPageInfo_t *getPageUsage(vm_t *, Bit32u ppage_index);
void virtualize_lconstruct(vm_t *, Bit32u l0, Bit32u l1, unsigned perm);
unsigned getMonPTi(vm_t *, unsigned pdi, unsigned source);
#define invlpg_mon_offset(mon_offset) \
asm volatile ("invlpg (%0)": :"r" (mon_offset): "memory")
/* For now nothing, but we should conditionally compile in code
* to panic when the expression is not true.
*/
#define VM_ASSERT(vm, expression) \
if ( !(expression) ) \
monpanic(vm, "Assertion (%s) failed at %s:%u", \
#expression, __FILE__, __LINE__)
#define CLI() asm volatile ("cli": : : "memory")
#define STI() asm volatile ("sti": : : "memory")
#endif /* MONITOR Space. */
#endif /* __MONITOR_H__ */

View File

@ -0,0 +1,51 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* paging.h: defines for x86 paging structures
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __PAGING_H__
#define __PAGING_H__
#define PG_D 0x00000040
#define PG_A 0x00000020
/* Page Directory/Table format */
typedef union {
Bit32u raw;
struct {
Bit32u P:1;
Bit32u RW:1;
Bit32u US:1;
Bit32u PWT:1;
Bit32u PCD:1;
Bit32u A:1;
Bit32u D:1;
Bit32u PS:1;
Bit32u G:1;
Bit32u avail:3;
Bit32u base:20;
} __attribute__ ((packed)) fields;
} __attribute__ ((packed)) pageEntry_t;
typedef union {
Bit8u bytes[4096];
pageEntry_t pte[1024];
} page_t;
#endif /* __PAGING_H__ */

View File

@ -0,0 +1,50 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* tss.h: defines for x86 hardware tasking structures
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __TSS_H__
#define __TSS_H__
typedef struct
{
Bit16u back, RESERVED0; /* Backlink */
Bit32u esp0; /* The CK stack pointer */
Bit16u ss0, RESERVED1; /* The CK stack selector */
Bit32u esp1; /* The parent KL stack pointer */
Bit16u ss1, RESERVED2; /* The parent KL stack selector */
Bit32u esp2; /* Unused */
Bit16u ss2, RESERVED3; /* Unused */
Bit32u cr3; /* The page directory pointer */
Bit32u eip; /* The instruction pointer */
Bit32u eflags; /* The flags */
Bit32u eax, ecx, edx, ebx; /* The general purpose registers */
Bit32u esp, ebp, esi, edi; /* The special purpose registers */
Bit16u es, RESERVED4; /* The extra selector */
Bit16u cs, RESERVED5; /* The code selector */
Bit16u ss, RESERVED6; /* The application stack selector */
Bit16u ds, RESERVED7; /* The data selector */
Bit16u fs, RESERVED8; /* And another extra selector */
Bit16u gs, RESERVED9; /* ... and another one */
Bit16u ldt, RESERVED10; /* The local descriptor table */
Bit16u trap; /* The trap flag (for debugging) */
Bit16u io; /* The I/O Map base address */
} __attribute__ ((packed)) tss_t;
#endif /* __TSS_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,63 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* system-mon.c: The 'motherboard' logic which connects the entire
* PC system.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"
void
sysFlushPrintBuf(vm_t *vm)
{
CLI();
vm->mon_request = MonReqFlushPrintBuf;
vm->guest.__mon2host();
STI();
}
void
sysRemapMonitor(vm_t *vm)
{
CLI();
vm->mon_request = MonReqRemapMonitor;
vm->guest.__mon2host();
STI();
}
void
toHostGuestFault(vm_t *vm, unsigned fault)
{
CLI();
vm->mon_request = MonReqGuestFault;
vm->guestFaultNo = fault;
vm->guest.__mon2host();
STI();
}
void
toHostPinUserPage(vm_t *vm, Bit32u ppi)
{
CLI();
vm->mon_request = MonReqPinUserPage;
vm->pinReqPPI = ppi;
vm->guest.__mon2host();
STI();
}

326
bochs/plex86/kernel/nexus.S Normal file
View File

@ -0,0 +1,326 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2001 Kevin P. Lawton
*
* nexus.S: code to transition between host and monitor/guest
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
.text
/* This module consists of relocatable code and data necessary to
* effect transitions between the host <--> guest. This information
* is purposely stored in this single page, so that we have access
* to it during our transitions between the monitor interrupt handler,
* and our host.
*
* I coded the relevant parts to use completely relocatable
* accesses to the following fields. This is necessary, so that
* we can float this code page anywhere in the monitor's linear
* address space.
*/
/* ===============================================================
* NOTE: If you modify ANY of the following fields, you must also
* update the corresponding entries in the C typedef 'nexus_t'.
* That construct is used from C land to access values in this
* relocatable page.
*/
.globl __nexus_start
__nexus_start:
__vm: ;.skip 4, 0
__host_gdt_info: ;.skip 6, 0
__host_idt_info: ;.skip 6, 0
__host_jmp_info: ;.skip 6, 0
__host_stack_info: ;.skip 6, 0
__host_ldt_sel: ;.skip 2, 0
__host_tss_sel: ;.skip 2, 0
__host_cr0: ;.skip 4, 0
__host_cr2: ;.skip 4, 0
__host_cr3: ;.skip 4, 0
__host_cr4: ;.skip 4, 0
__mon_gdt_info: ;.skip 6, 0
__mon_idt_info: ;.skip 6, 0
__mon_jmp_info: ;.skip 6, 0
__mon_stack_info: ;.skip 6, 0
__mon_ldt_sel: ;.skip 2, 0
__mon_tss_sel: ;.skip 2, 0
__mon_base: ;.skip 4, 0
__mon_cr0: ;.skip 4, 0
__mon_cr3: ;.skip 4, 0
__mon_cr4: ;.skip 4, 0
__mon_eflags: ;.skip 4, 0
__transition_pde: ;.skip 4, 0
__transition_pde_p_host: ;.skip 4, 0
__transition_pde_p_mon: ;.skip 4, 0
__transition_laddr: ;.skip 4, 0
/* ===============================================================
* End NOTE.
*/
#define OFFSET_OF(field) [field - __nexus_start]
/* These are the offsets of the structures above, from the */
/* beginning of this section. */
#define HOST_GDT_INFO OFFSET_OF(__host_gdt_info)
#define HOST_IDT_INFO OFFSET_OF(__host_idt_info)
#define HOST_JMP_INFO OFFSET_OF(__host_jmp_info)
#define HOST_STACK_INFO OFFSET_OF(__host_stack_info)
#define HOST_LDT_SEL OFFSET_OF(__host_ldt_sel)
#define HOST_TSS_SEL OFFSET_OF(__host_tss_sel)
#define HOST_CR0 OFFSET_OF(__host_cr0)
#define HOST_CR2 OFFSET_OF(__host_cr2)
#define HOST_CR3 OFFSET_OF(__host_cr3)
#define HOST_CR4 OFFSET_OF(__host_cr4)
#define MON_GDT_INFO OFFSET_OF(__mon_gdt_info)
#define MON_IDT_INFO OFFSET_OF(__mon_idt_info)
#define MON_JMP_INFO OFFSET_OF(__mon_jmp_info)
#define MON_STACK_INFO OFFSET_OF(__mon_stack_info)
#define MON_LDT_SEL OFFSET_OF(__mon_ldt_sel)
#define MON_TSS_SEL OFFSET_OF(__mon_tss_sel)
#define MON_CR0 OFFSET_OF(__mon_cr0)
#define MON_CR3 OFFSET_OF(__mon_cr3)
#define MON_CR4 OFFSET_OF(__mon_cr4)
#define MON_BASE OFFSET_OF(__mon_base)
#define TRANSITION_PDE OFFSET_OF(__transition_pde)
#define TRANSITION_PDE_P_HOST OFFSET_OF(__transition_pde_p_host)
#define TRANSITION_PDE_P_MON OFFSET_OF(__transition_pde_p_mon)
#define TRANSITION_LADDR OFFSET_OF(__transition_laddr)
/* To make this code page and data accesses to the fields above */
/* relocatable, I use the following conventions. I load EBX with */
/* a pointer to the beginning of this page, to be used with an */
/* access through the CS: segment. We can easily get the */
/* current EIP with a call/pop EBX, so the combination of CS:EBX, */
/* accesses this page no matter where it is located. */
/* ================================================================== */
.globl __host2mon /* Start function __host2mon() */
__host2mon:
/* Save host context first, so it can be restored later */
pushfl /* Save host flags */
pushal /* Save host general regs */
pushl %es /* Save host segments */
pushl %ds
pushl %fs
pushl %gs
/* Put EIP of beginning of this section in EBX to be used to */
/* access data. */
call null_call
null_call:
popl %ebx
subl $(OFFSET_OF(null_call)), %ebx
/* Create identity mapping of this page into the monitor context */
movl (TRANSITION_PDE_P_HOST)(%ebx), %eax
movl (TRANSITION_PDE)(%ebx), %ebp
xchgl %ebp, (%eax) /* old PDE saved in %ebp to be restored below */
/* Save host GDT, LDT, IDT, and TSS */
sgdt (HOST_GDT_INFO)(%ebx)
sidt (HOST_IDT_INFO)(%ebx)
sldt (HOST_LDT_SEL)(%ebx)
str (HOST_TSS_SEL)(%ebx)
movl %esp, (HOST_STACK_INFO)(%ebx) /* Save host SS:ESP */
movw %ss, (4+HOST_STACK_INFO)(%ebx) /* for later restore */
leal (OFFSET_OF(__host_cs))(%ebx), %eax /* Save the CS:EIP for monitor to */
movl %eax, (HOST_JMP_INFO)(%ebx) /* jump to when reloading host CS. */
movw %cs, (4+HOST_JMP_INFO)(%ebx) /* See __guest2host below. */
/* Save host CRx values */
movl %cr0, %eax
movl %cr2, %ecx
movl %cr4, %edx
movl %cr3, %esi
movl %eax, (HOST_CR0)(%ebx)
movl %ecx, (HOST_CR2)(%ebx)
movl %edx, (HOST_CR4)(%ebx)
movl %esi, (HOST_CR3)(%ebx)
/* Compute monitor CRx values */
movl (MON_CR0)(%ebx), %eax
movl (MON_CR4)(%ebx), %edx
movl (MON_CR3)(%ebx), %esi
/* Before changing the PSE bit in CR4, we have to switch over */
/* to the new CR3 (this page identity mapped anyways). Otherwise */
/* the processor could flush the TLB, and reload the entry for */
/* this page, only to find it's marked with a 4Meg Page, but we */
/* have that support turned off, before we actually */
/* reloaded CR3! */
movl %esi, %cr3 /* Set monitor CR3 */
movl %eax, %cr0 /* Set monitor CR0 */
movl %edx, %cr4 /* Set monitor CR4 */
movl %esi, %cr3 /* Set monitor CR3 */
jmp null_jmp0
null_jmp0:
/* Switch to monitor GDT, LDT, and IDT */
lgdt (MON_GDT_INFO)(%ebx)
lidt (MON_IDT_INFO)(%ebx)
lldt (MON_LDT_SEL)(%ebx)
/* Switch to monitor stack and CS */
/* and jump to the monitor-side nexus page */
lss (MON_STACK_INFO)(%ebx), %esp
ljmp (MON_JMP_INFO)(%ebx)
.globl __mon_cs
__mon_cs:
/* Reset DS:EBX to point to the monitor-side nexus page */
movw %ss, %ax
movw %ax, %ds /* copy SS to DS */
movw %ax, %es /* copy SS to ES */
movl %esp, %ebx
andl $0xfffff000, %ebx
/* Clear busy bit of the monitor TSS and switch to it */
movzwl (MON_TSS_SEL)(%ebx), %eax
andl $0xfffffff8, %eax
addl (MON_GDT_INFO+2)(%ebx), %eax
subl (MON_BASE)(%ebx), %eax
andl $0xfffffdff, 4(%eax)
ltr (MON_TSS_SEL)(%ebx)
/* We no longer need the nexus page identity mapped. Fix the mapping */
/* back to the way it should be, in case guest code uses it. */
movl (TRANSITION_PDE_P_MON)(%ebx), %eax
movl %ebp, (%eax) /* %ebp still contains the original value */
movl (TRANSITION_LADDR)(%ebx), %eax
invlpg (%eax) /* Tell TLB about the change */
/* +++ xxx fix this, need to convert pure laddr to offset */
movl %cr3, %eax /* +++ xxx */
movl %eax, %cr3 /* +++ xxx */
/* */
/* We can now restore the monitor context from it's stack. */
/* */
popl %gs
popl %fs
popal /* Restore mon general registers */
popfl /* Restore mon eflags */
ret /* Resume execution in monitor exception handler code. */
/* ================================================================== */
.globl __mon2host /* Start function __mon2host() */
__mon2host:
pushfl /* Save mon flags */
pushal /* Save mon general registers */
pushl %fs
pushl %gs
/* Set EBX to point to this nexus page */
movl %esp, %ebx
andl $0xfffff000, %ebx
movl %esp, (MON_STACK_INFO)(%ebx) /* Save mon ESP */
/* Identity map this code page to host address space. */
movl (TRANSITION_PDE_P_MON)(%ebx), %eax
movl (TRANSITION_PDE)(%ebx), %ebp
xchgl %ebp, (%eax) /* old PDE saved in %ebp to be restored below */
movl (TRANSITION_LADDR)(%ebx), %eax
/* Switch EBX to point to the identity mapped copy of */
/* the nexus page, and jump to the copy of this code there. */
subl (MON_BASE)(%ebx), %eax
invlpg (%eax) /* Tell TLB about the change */
movl %eax, %ebx
leal (OFFSET_OF(__mon_nexus_jmp))(%ebx), %eax
jmp *%eax
__mon_nexus_jmp:
/* We are still in the monitor context, but are running at the */
/* same CS.base+EIP location in either host or monitor context, */
/* and this page is identity mapped between the 2 contexts. */
/* We can now switch to the host CR3, and be sure that execution */
/* will resume at the next instruction. */
/* NOTE: Don't try to access the stack after CR3 was reloaded */
/* but before we switched back to the host stack! */
/* Restore host CRx values */
movl (HOST_CR0)(%ebx), %eax
movl (HOST_CR2)(%ebx), %ecx
movl (HOST_CR4)(%ebx), %edx
movl (HOST_CR3)(%ebx), %esi
movl %eax, %cr0
movl %ecx, %cr2
movl %edx, %cr4
movl %esi, %cr3
jmp null_jmp1
null_jmp1:
/* Switch to host GDT, LDT, and IDT */
lgdt (HOST_GDT_INFO)(%ebx)
lidt (HOST_IDT_INFO)(%ebx)
lldt (HOST_LDT_SEL)(%ebx)
/* Restore host stack and CS */
lss (HOST_STACK_INFO)(%ebx), %esp
ljmp (HOST_JMP_INFO)(%ebx)
__host_cs:
/* Clear busy bit of the host TSS and switch to it */
/* Note that DS is still the monitor segment with base (MON_BASE). */
movzwl (HOST_TSS_SEL)(%ebx), %eax
andl $0xfffffff8, %eax
addl (HOST_GDT_INFO+2)(%ebx), %eax
subl (MON_BASE)(%ebx), %eax
andl $0xfffffdff, 4(%eax)
ltr (HOST_TSS_SEL)(%ebx)
/* We no longer need the nexus page identity mapped, so we clean */
/* up the monitor page directory in case the host looks at it. */
/* Note that SS is already the host segment. */
movl (TRANSITION_PDE_P_HOST)(%ebx), %eax
ss; movl %ebp, (%eax) /* %ebp still contains the original value */
/* Now we can restore the rest of */
/* the host context from the host stack. Look at __host2guest */
/* for the format of the values stored on the host stack. */
popl %gs
popl %fs
popl %ds
popl %es
popal
popfl
ret
.globl __nexus_end
__nexus_end:

View File

@ -0,0 +1,767 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* paging-mon.c: Virtualized (monitor) paging functionality.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"
static unsigned allocatePT(vm_t *, unsigned pdi);
static unsigned strengthenPagePermissions(vm_t *, phyPageInfo_t *usage,
unsigned new_access_perm);
/*static void sanity_check_pdir(vm_t *vm, unsigned id, Bit32u guest_laddr); */
/* +++ fix retrieve mon pages function and .base issue */
/* also open_guest_phy_page expects shifted page val */
/* +++ write_physical() has hack to ignore when perm!=RW, fix! */
/* +++ add async handling in emulation.c, like in preGuest() */
/* Cases which would generate a mon #PF */
/* ==================================== */
/* lazy map */
/* r/w to current code page */
/* guest #PF (access checks of cpl,rw) */
/* w to RO construct */
/* r/w to NA construct */
/* inhibits */
#if 0
======= Old notes =====================================================
IDT,GDT,LDT: limit = 64K; TR is a dont care
What to do with PDir, PTbl?
What to do about coherence probs with page tables and TLB?
When are A,D bits copied between monitor and host?
Need check for mapping of space used by monitor
Code cache probably should not have laddr in it
guest.PG==0, how are phy pages unmarked when constructs move?
guest.PG transition: dump everything (flush)
remapping descriptor tables after page flush
make sure to validate phy_attr everywhere before using it.
checks for the phy_attr of page that PDir goes in
page fault because of monP?E.RW==0, but guestP?E==1
/* +++ what about virtualized linear structs like GDT, IDT, ... */
#endif
#warning "Have to be careful unpinning a page which is open"
#warning " via open_guest_phy_page(). Multiple pages could be"
#warning " open in the page walk at one time until D/A bits are set."
static inline Bit32u
getHostOSPinnedPage(vm_t *vm, Bit32u ppi)
{
/* If physical page is already pinned by host OS, then we already
* know the physical address of the page.
*/
if (vm->pageInfo[ppi].attr.fields.pinned)
return( vm->pageInfo[ppi].hostPPI );
/* Page is not already pinned by the host OS. We need to request
* from the host OS, that this page is pinned and find the
* physical address.
*/
toHostPinUserPage(vm, ppi);
if ( !vm->pageInfo[ppi].attr.fields.pinned )
monpanic(vm, "getHostOSPinnedPage: page was not marked pinned.\n");
return( vm->pageInfo[ppi].hostPPI );
}
unsigned
allocatePT(vm_t *vm, unsigned pdi)
{
unsigned map_i;
/* Allocate one of the (preallocated) pages for */
/* the monitor to use for a page table at the PDI given. */
map_i = vm->ptbl_laddr_map_i;
if (map_i >= MON_PAGE_TABLES) {
monpanic(vm, "allocatePT: out of page tables\n");
}
#if ANAL_CHECKS
if (vm->guest.addr.page_tbl_laddr_map[pdi] != -1) {
monprint(vm, "allocatePT: check failed.\n");
monpanic(vm, " pdi=0x%x, laddr_map=0x%x\n",
pdi, vm->guest.addr.page_tbl_laddr_map[pdi]);
}
#endif
vm->guest.addr.page_tbl_laddr_map[pdi] = map_i;
vm->ptbl_laddr_map_i++;
return(map_i);
}
unsigned
getMonPTi(vm_t *vm, unsigned pdi, unsigned source)
{
unsigned map_i;
map_i = vm->guest.addr.page_tbl_laddr_map[pdi];
#if ANAL_CHECKS
if (map_i == -1) {
monprint(vm, "getMonPTi: check failed.\n");
monpanic(vm, " pdi=0x%x, map_i=0x%x, source=%u\n",
pdi, map_i, source);
}
if (map_i >= MON_PAGE_TABLES)
monpanic(vm, "getMonPTi: map_i OOB\n");
#endif
return(map_i);
}
/* Invalidate the mapping of a guest page in the monitor.
* When situations change, such as a change in the permissions
* necessary to virtualize the page properly, we'll need to do
* this first, before remapping with the new permissions.
*/
unsigned
strengthenPagePermissions(vm_t *vm, phyPageInfo_t *pusage,
unsigned new_access_perm)
{
pusage->attr.fields.access_perm = new_access_perm;
if (pusage->attr.fields.lmap_count == 0) {
/* No linear addresses are mapped to this phy page yet.
* Nothing to do. */
return 0;
}
else if (pusage->attr.fields.lmap_count == 1) {
/* One linear address is mapped to this phy page. */
Bit32u pdi, pti;
pageEntry_t *monPDE, *monPTE;
page_t *monPTbl;
unsigned map_i;
pdi = (pusage->attr.fields.laddr_backlink >> 10);
pti = (pusage->attr.fields.laddr_backlink & 0x3ff);
monPDE = &vm->guest.addr.page_dir[pdi];
if ( !monPDE->fields.P )
monpanic(vm, "strengthenPP: monPDE.P==0\n");
map_i = getMonPTi(vm,pdi,10);
monPTbl = &vm->guest.addr.page_tbl[map_i];
monPTE = &monPTbl->pte[pti];
if ( !monPTE->fields.P ) {
/*monprint(vm, "strengthenPP: bl=0x%x, AP=%u\n",
*pusage->attr.fields.laddr_backlink, new_access_perm); */
/*monpanic(vm, "strengthenPP: monPTE.P==0\n"); */
}
else if (pusage->attr.fields.access_perm==PagePermNA) {
/* Permissions were changed to No Access */
monPTE->raw = 0;
}
else if (pusage->attr.fields.access_perm==PagePermRO) {
/* Permissions were changed to RO */
monPTE->fields.RW = 0;
}
else {
monpanic(vm, "strengthenPP: PagePermRW\n");
}
/* Flush the old TLB entry */
invlpg_mon_offset(
Guest2Monitor(vm, pusage->attr.fields.laddr_backlink<<12)
);
return 0;
}
else {
/* Multiple linear addresses are mapped to this phy page. */
/* Since we dont store enough backlink info to virtualize all */
/* linear addresses which point to this phy page, we have to dump */
/* all dynamic mappings and start over. */
monpanic(vm, "strengthenPP: multiple lin addr\n");
/*monPagingRemap(vm);*/
return 1;
}
}
unsigned
addPageAttributes(vm_t *vm, Bit32u ppi, Bit32u req_attr)
{
phyPageInfo_t *pusage;
unsigned new_access_perm;
VM_ASSERT(vm, ppi < vm->pages.guest_n_pages);
pusage = &vm->pageInfo[ppi];
if (pusage->tsc < vm->vpaging_tsc) {
/* The dynamic attributes for this page are not valid since
* the last remap. getPageUsage() has logic to build attributes.
*/
getPageUsage(vm, ppi);
}
/* Build new attributes based on old ones, and requested ones. */
pusage->attr.raw |= req_attr;
/* Look at strength of new access restrictions */
if (pusage->attr.raw & PageUsageCausesNA)
new_access_perm = PagePermNA;
else if (pusage->attr.raw & PageUsageCausesRO)
new_access_perm = PagePermRO;
else
new_access_perm = PagePermRW;
if (new_access_perm > pusage->attr.fields.access_perm) {
/* New usage causes a stronger access restriction. Remap them. */
return( strengthenPagePermissions(vm, pusage, new_access_perm) );
}
return 0;
}
phyPageInfo_t *
getPageUsage(vm_t *vm, Bit32u ppi)
{
phyPageInfo_t *pusage;
VM_ASSERT(vm, ppi < vm->pages.guest_n_pages);
pusage = &vm->pageInfo[ppi];
if (pusage->tsc < vm->vpaging_tsc) {
/* The dynamic attributes for this page are not valid since
* the last remap. Clear them out, and timestamp.
*/
pusage->tsc = vm_rdtsc();
pusage->attr.raw &= PageUsageSticky;
if (pusage->attr.raw & PageUsageCausesNA)
pusage->attr.fields.access_perm = PagePermNA;
else if (pusage->attr.raw & PageUsageCausesRO)
pusage->attr.fields.access_perm = PagePermRO;
else
pusage->attr.fields.access_perm = PagePermRW;
}
return(pusage);
}
void *
open_guest_phy_page(vm_t *vm, Bit32u ppi, Bit8u *mon_offset)
{
page_t *pageTable;
Bit32u pti, mon_range_offset;
VM_ASSERT(vm, ppi < vm->pages.guest_n_pages);
/* Since we rewind our CS/DS.base so that the beginning of our */
/* monitor pages land on the beginning of a new 4Meg boundary */
/* (separate PDE), find out what mon_offset is in terms of */
/* an offset from the beginning of the PDE boundary. */
mon_range_offset = ( ((Bit32u) mon_offset) -
kernelModulePages.startOffsetPageAligned );
pti = (mon_range_offset >> 12) & 0x3ff;
pageTable = vm->guest.addr.nexus_page_tbl;
/* Remap the base field. All the rest of the fields are */
/* set previously, and can remain the same. */
pageTable->pte[pti].fields.base = getHostOSPinnedPage(vm, ppi);
invlpg_mon_offset( (Bit32u) mon_offset );
return(mon_offset);
}
void
close_guest_phy_page(vm_t *vm, Bit32u ppi)
{
/* ppi is >> 12 already */
/* +++ */
}
void
virtualize_lconstruct(vm_t *vm, Bit32u l0, Bit32u l1, unsigned perm)
{
/* Mark pages for a protected construct in linear space as */
/* virtualized (protected), if it is mapped into monitor space. */
/* Pages which are not yet mapped in, are virtualized dynamically */
/* when they are mapped in. */
Bit32u pdi, pdi0, pdi1, pti, pti0, pti1;
pageEntry_t *monPDE, *monPTE;
page_t *monPTbl;
/* +++ For now, can just dump all page mappings and start over */
/* again. Need to complete this function, so we can virtualize */
/* only those pages which need it, and keep the other ones. */
/* +++ Need to look at perm also. */
monpanic(vm, "vir_lconstruct: unfinished.\n");
/*monPagingRemap(vm);*/
return;
if (vm->guest.addr.guest_cpu->cr0.fields.pg)
monpanic(vm, "virtualize_lconstruct: guest PG==1\n");
if (l0 >= l1)
monpanic(vm, "virtualize_lconstruct: l0>=l1!\n");
if ( (l1-l0) > (64*1024) )
monpanic(vm, "virtualize_lconstruct: span is > 64k!\n");
pdi0 = l0 >> 22;
pdi1 = l1 >> 22;
pti0 = (l0 >> 12) & 0x000003ff;
for (pdi=pdi0; pdi<=pdi1; pdi++) {
if ( pdi == vm->mon_pdi )
monpanic(vm, "virtualize_lconstruct: conflict with monitor space\n");
monPDE = &vm->guest.addr.page_dir[pdi];
if (monPDE->fields.P) {
if (pdi<pdi1)
pti1 = 0x3ff; /* spans multiple pdi's, use last index of range */
else
pti1 = (l1 >> 12) & 0x000003ff; /* use index of last address */
for (pti=pti0; pti<=pti1; pti++) {
/* +++ */
/* +++ FIX THIS!!!, set depending on guest.CR0.PG */
/* +++ */
monPTbl = &vm->guest.addr.page_tbl[pdi];
monPTE = &monPTbl->pte[pti];
if (monPTE->fields.P) {
/* +++ finish this! */
/* The physical page for this linear address is allocated */
/* and mapped into the monitor. We can access the attributes */
/* for this physical page. Even if it has been virtualized */
/* before, we still need to mark it since it could have been */
/* virtualized due to a physical page constraint. */
monpanic(vm, "virtualize_lconstruct: finish.\n");
}
}
}
pti0 = 0; /* start address at boundary of next PDI */
}
}
unsigned
mapGuestLinAddr(vm_t *vm, Bit32u guest_laddr, Bit32u *guest_ppi,
unsigned req_us, unsigned req_rw, Bit32u attr,
Bit32u *error)
{
Bit32u pdi, pti;
Bit32u guest_lpage_index, ptbl_ppi;
page_t *monPTbl;
pageEntry_t *monPDE, *monPTE;
pageEntry_t *guestPDir, guestPDE, *guestPTbl, guestPTE;
Bit32u guest_pdir_page_index;
unsigned pt_index, us, rw;
phyPageInfo_t *pusage;
unsigned wasRemap = 0;
guest_lpage_index = guest_laddr >> 12;
pdi = guest_lpage_index >> 10;
pti = guest_lpage_index & 0x3ff;
monPDE = &vm->guest.addr.page_dir[pdi];
if (vm->guest.addr.guest_cpu->cr0.fields.pg) {
/* Check out the guest's mapping of this address to see */
/* if would allow for an access. */
/* First, get the guest PDE */
guest_pdir_page_index = A20Addr(vm, vm->guest.addr.guest_cpu->cr3) >> 12;
if (guest_pdir_page_index >= vm->pages.guest_n_pages)
monpanic(vm, "mapGuestLinAddr: PG=1 guest PDE OOB\n");
/* Open a window into guest physical memory */
guestPDir = open_guest_phy_page(vm, guest_pdir_page_index,
vm->guest.addr.tmp_phy_page0);
guestPDE = guestPDir[pdi];
/* See if present, before fetching PTE */
if (guestPDE.fields.P==0) {
*error = 0x00000000; /* RSVD=0, P=0 */
goto np_exception;
}
#if 0
if (vm->guestCpuIDInfo.procSignature.fields.family < 6) {
/* Update A bit of PDE memory image if not already */
if ( guestPDE.fields.A == 0 ) {
guestPDE.fields.A = 1;
guestPDir[pdi] = guestPDE;
}
}
#endif
/* Second, get the guest PDE */
ptbl_ppi = A20PageIndex(vm, guestPDE.fields.base);
if (ptbl_ppi >= vm->pages.guest_n_pages)
monpanic(vm, "mapGuestLinAddr: PG=1 guest PTE OOB\n");
guestPTbl = open_guest_phy_page(vm, ptbl_ppi,
vm->guest.addr.tmp_phy_page1);
guestPTE = guestPTbl[pti];
if (guestPTE.fields.P==0) {
*error = 0x00000000; /* RSVD=0, P=0 */
goto np_exception;
}
#if 0
/* +++ */
if (guestPDE.raw & PDEUnhandled)
monpanic(vm, "mapGuestLinAddr: guestPDE 0x%08x\n", guestPDE.raw);
#endif
/* See if requested guest priv is weaker than guest PDE priv */
if (req_us > guestPDE.fields.US) {
*error = 0x00000001; /* RSVD=0, P=1 */
goto access_exception;
}
if ( (req_rw > guestPDE.fields.RW) &&
(vm->guest.addr.guest_cpu->cr0.fields.wp || req_us) ) {
*error = 0x00000001; /* RSVD=0, P=1 */
goto access_exception;
}
#warning "ignoring PTEUnhandled bits"
#if 0
if (guestPTE.raw & PTEUnhandled)
monpanic(vm, "mapGuestLinAddr: guestPTE 0x%08x\n", guestPTE.raw);
#endif
if (req_us > guestPTE.fields.US) {
*error = 0x00000001; /* RSVD=0, P=1 */
goto access_exception;
}
if ( (req_rw > guestPTE.fields.RW) &&
(vm->guest.addr.guest_cpu->cr0.fields.wp || req_us) ) {
*error = 0x00000001; /* RSVD=0, P=1 */
goto access_exception;
}
#if 0
if (vm->guestCpuIDInfo.procSignature.fields.family >= 6) {
/* Update A bit of PDE memory image if not already */
if ( guestPDE.fields.A == 0 ) {
guestPDE.fields.A = 1;
guestPDir[pdi] = guestPDE;
}
}
/* Update A bit in PTE memory image if not already */
if ( (guestPTE.fields.A == 0) ||
((req_rw==1) && !guestPTE.fields.D) ) {
guestPTE.fields.A = 1;
if (req_rw==1)
guestPTE.fields.D = 1;
guestPTbl[pti] = guestPTE;
}
#endif
*guest_ppi = A20PageIndex(vm, guestPTE.fields.base);
}
else {
/* guest paging is off, linear address is physical address */
guest_pdir_page_index = 0; /* keep compiler quiet */
*guest_ppi = A20PageIndex(vm, guest_lpage_index);
}
if (*guest_ppi >= vm->pages.guest_n_pages)
return(MapLinPPageOOB);
/* +++ mapping in guest pages, check static phy_attr bits first before */
/* +++ allowing non-protected. */
mapIntoMonitor:
/* At this point, we know that the guest's paging system
* (if enabled) would allow for this access. Now we have to
* see about mapping it into the monitor linear address space.
*/
pusage = getPageUsage(vm, *guest_ppi);
if (wasRemap > 1)
monpanic(vm, "wasRemap>1\n");
/*
* Check monitor PDE
*/
if (monPDE->fields.P == 0) {
/* OK, Lazy PT map/allocate */
if (vm->guest.addr.guest_cpu->cr0.fields.pg) {
phyPageInfo_t *pde_pusage;
pde_pusage =
getPageUsage(vm, A20PageIndex(vm, guestPDE.fields.base));
if (pde_pusage->attr.raw & PageBadUsage4PTbl) {
#warning "PDE->PDir hack"
/*monprint(vm, "PDE.base=0x%x CR3=0x%x\n",
* A20PageIndex(vm, guestPDE.fields.base),
* A20Addr(vm, vm->guest_cpu.cr3));
*/
return(MapLinEmulate);
}
if (pde_pusage->attr.raw & PageUsagePTbl) {
/* It is possible that multiple PDE entries will point to */
/* the same Page Table. In this case, we need to search to */
/* find which one the monitor already mapped in, and get */
/* a pointer to the Page Table allocated by the monitor. */
Bit32u guest_ptbl_index;
unsigned i;
guestPDir = open_guest_phy_page(vm, guest_pdir_page_index,
vm->guest.addr.tmp_phy_page0);
guest_ptbl_index = A20PageIndex(vm, guestPDir[pdi].fields.base);
monPTbl = (void *) 0;
pt_index = 0; /* keep compiler quiet */
for (i=0; i<1024; i++) {
if (i==pdi) continue; /* skip current PDI */
guestPDE = guestPDir[i];
if ( guestPDE.fields.P &&
(A20PageIndex(vm, guestPDE.fields.base)==guest_ptbl_index) ) {
/* OK, guest has a PDE which matches. If it is mapped into */
/* the monitor already, then we are done searching. */
if (vm->guest.addr.page_dir[i].fields.P) {
pt_index = getMonPTi(vm, i, 11);
vm->guest.addr.page_tbl_laddr_map[pdi] = pt_index;
monPTbl = &vm->guest.addr.page_tbl[pt_index];
break;
}
}
}
close_guest_phy_page(vm, guest_pdir_page_index);
if (i>=1024)
monpanic(vm, "mapGuestLinAddr: PDE maps to existing PTbl.\n");
}
else {
/* Allocate PT using paged scheme. */
pt_index = allocatePT(vm, pdi);
monPTbl = &vm->guest.addr.page_tbl[pt_index];
mon_memzero(monPTbl, sizeof(*monPTbl));
}
if (vm->guest.addr.guest_cpu->sreg[SRegCS].des.dpl==3) {
/* For user code, we can use the guest US & RW values as-is, */
/* since they are honored as such with either CR0.WP value. */
us = guestPDE.fields.US;
rw = guestPDE.fields.RW;
}
else { /* guest supervisor code */
/* For supervisor code, access rules are different dependent on */
/* the value of CR0.WP. */
if (vm->guest.addr.guest_cpu->cr0.fields.wp==0) {
/* If CR0.WP=0, then supervisor code can write to any page, */
/* and permissions are effectively ignored. */
us = 1;
rw = 1;
}
else { /* CR0.WP==1 */
/* If CR0.WP=0, then supervisor code can read from any page, */
/* but write permission depends on the RW bit. */
us = 1;
rw = guestPDE.fields.RW;
}
}
/* Base/Avail=0/G=0/PS=0/D=d/A=a/PCD=0/PWT=0/US=us/RW=rw/P=1 */
monPDE->raw =
(vm->pages.page_tbl[pt_index] << 12) | (guestPDE.raw & 0x60) |
(us<<2) | (rw<<1) | 1;
if ( addPageAttributes(vm, A20PageIndex(vm, guestPDE.fields.base),
PageUsagePTbl) ) {
wasRemap++;
goto mapIntoMonitor;
}
}
else {
/* Allocate PT using non-paged scheme. */
pt_index = allocatePT(vm, pdi);
monPTbl = &vm->guest.addr.page_tbl[pt_index];
mon_memzero(monPTbl, 4096);
/* Base/Avail=0/G=0/PS=0/D=0/A=0/PCD=0/PWT=0/US=1/RW=1/P=1 */
monPDE->raw =
(vm->pages.page_tbl[pt_index] << 12) | 0x7;
}
}
else {
/* monPDE->P == 1 */
/* Make sure this laddr does not conflict with monitor space */
/* This can only happen when monPDE.P==1, since the monitor */
/* is always mapped in. */
if ( (guest_laddr & 0xffc00000) == vm->mon_pde_mask )
return(MapLinMonConflict);
pt_index = getMonPTi(vm, pdi, 12);
monPTbl = &vm->guest.addr.page_tbl[pt_index];
}
monPTE = &monPTbl->pte[pti];
/*
* Check monitor PTE
*/
if (monPTE->fields.P == 0) {
if (vm->guest.addr.guest_cpu->cr0.fields.pg) {
if (vm->guest.addr.guest_cpu->sreg[SRegCS].des.dpl==3) {
/* For user code, we can use the guest US & RW values as-is, */
/* since they are honored as such with either CR0.WP value. */
us = guestPTE.fields.US;
rw = guestPTE.fields.RW;
}
else { /* guest supervisor code */
/* For supervisor code, access rules are different dependent on */
/* the value of CR0.WP. */
if (vm->guest.addr.guest_cpu->cr0.fields.wp==0) {
/* If CR0.WP=0, then supervisor code can write to any page, */
/* and permissions are effectively ignored. */
us = 1;
rw = 1;
}
else { /* CR0.WP==1 */
/* If CR0.WP=0, then supervisor code can read from any page, */
/* but write permission depends on the RW bit. */
us = 1;
rw = guestPTE.fields.RW;
}
}
if (pusage->attr.fields.access_perm==PagePermRO) {
rw = 0;
if (req_rw)
return(MapLinEmulate);
}
else if (pusage->attr.fields.access_perm==PagePermNA)
return(MapLinEmulate);
/* Base/Avail=0/G=0/PS=0/D=d/A=a/PCD=0/PWT=0/US=1/RW=rw/P=1 */
monPTE->raw =
(getHostOSPinnedPage(vm, *guest_ppi) << 12) | (guestPTE.raw & 0x60) |
0x5 | (rw<<1);
}
else { /* CR0.PG==0 */
rw = 1; /* Paging off is effectively RW */
if (pusage->attr.fields.access_perm==PagePermRO) {
rw = 0;
if (req_rw)
return(MapLinEmulate);
}
else if (pusage->attr.fields.access_perm==PagePermNA)
return(MapLinEmulate);
/* Base/Avail=0/G=0/PS=0/D=0/A=0/PCD=0/PWT=0/US=1/RW=rw/P=1 */
monPTE->raw =
(getHostOSPinnedPage(vm, *guest_ppi) << 12) | 0x5 | (rw<<1);
}
/* Mark physical page as having an unvirtualized linear address
* mapped to it.
*/
if (pusage->attr.fields.lmap_count == 0) {
pusage->attr.fields.lmap_count = 1;
pusage->attr.fields.laddr_backlink = guest_lpage_index;
}
else if (pusage->attr.fields.lmap_count == 1) {
pusage->attr.fields.lmap_count = 2; /* max out count */
/* Count maxed out, we only store laddr_backlink of 1st mapping. */
}
else {
/* Count maxed out, we don't store any more info. */
}
invlpg_mon_offset( Guest2Monitor(vm, guest_laddr) );
return(MapLinOK);
}
else {
/* PTE.P == 1 */
return(MapLinAlreadyMapped);
}
np_exception:
access_exception:
*error |= (req_us<<2) | (req_rw<<1);
return(MapLinException);
}
void
guestPageFault(vm_t *vm, guest_context_t *context, Bit32u cr2)
{
Bit32u guest_ppi, error, gerror;
unsigned us, rw;
/* Make sure this laddr does not conflict with monitor space */
if ( (cr2 & 0xffc00000) == vm->mon_pde_mask )
monpanic(vm, "PageFault: guest access to monitor space\n");
error = context->error;
if (error & 0x8) /* If RSVD bits used in PDir */
monpanic(vm, "guestPageFault: RSVD\n");
us = vm->guest.addr.guest_cpu->sreg[SRegCS].des.dpl == 3;
rw = (error >> 1) & 1;
/* +++ should base attr (currently 0) on whether this is */
/* code or data??? only if siv==1 */
switch (mapGuestLinAddr(vm, cr2, &guest_ppi, us, rw, 0, &gerror)) {
case MapLinOK:
return;
case MapLinMonConflict:
monpanic(vm, "guestPageFault: MapLinMonConflict:\n");
case MapLinAlreadyMapped:
monpanic(vm, "guestPageFault: MapLinAlreadyMapped:\n");
/*emulate_instr(vm, context, 2);*/
return;
case MapLinPPageOOB:
monpanic(vm, "guestPageFault: MapLinPPageOOB (0x%x):\n", cr2);
case MapLinEmulate:
monpanic(vm, "guestPageFault: MapLinEmulate:\n");
/*emulate_instr(vm, context, 3);*/
return;
case MapLinException:
/*monpanic(vm, "guestPageFault: emulate_exception was here.\n");*/
/*emulate_exception(vm, ExceptionPF, gerror);*/
toHostGuestFault(vm, ExceptionPF);
return;
default:
monpanic(vm, "guestPageFault: MapLin: default case:\n");
}
}
#if 0
void
sanity_check_pdir(vm_t *vm, unsigned id, Bit32u guest_laddr)
{
pageEntry_t *monPDE;
Bit32u pdi;
unsigned pt_index;
for (pdi=0; pdi<1024; pdi++) {
monPDE = &vm->guest.addr.page_dir[pdi];
if ( (pdi!=vm->mon_pdi) &&
monPDE->fields.P ) {
pt_index = vm->guest.addr.page_tbl_laddr_map[pdi];
if (pt_index == -1)
monpanic(vm, "sanity_check_pdir: pt_index==-1\n");
if (pt_index >= vm->pages.guest_n_pages)
monpanic(vm, "sanity_check_pdir: pt_index OOB\n");
if ( monPDE->fields.base != vm->pages.page_tbl[pt_index] ) {
monprint(vm, "gaddr=0x%x\n", guest_laddr);
monprint(vm, "pt_index=%u\n", pt_index);
monprint(vm, "map[0x302]=%u\n",
vm->guest.addr.page_tbl_laddr_map[0x302]);
monpanic(vm, "sanity_check_pdir: id=%u "
"pdi=0x%x\n", id, pdi);
}
}
}
}
#endif

View File

@ -0,0 +1,71 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* panic-mon.c: Monitor panic facility.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"
extern int mon_vprint(vm_t *vm, char *fmt, va_list args);
/*======================================== */
/* Only to be used in monitor/guest space! */
/*======================================== */
void
monpanic(vm_t *vm, char *fmt, ...)
{
monprint(vm, "plex86 panic: ");
if (fmt)
{
va_list args;
va_start(args, fmt);
mon_vprint(vm, fmt, args);
va_end(args);
}
loop:
CLI();
vm->mon_request = MonReqPanic;
vm->guest.__mon2host();
/* mon2host() should never return in this case. In case it ever */
/* does because our logic is broken, keep returning back to */
/* the host so we at least don't hang the machine. */
goto loop;
}
void
monpanic_nomess(vm_t *vm)
{
loop:
CLI();
vm->mon_request = MonReqPanic;
vm->guest.__mon2host();
/* mon2host() should never return in this case. In case it ever */
/* does because our logic is broken, keep returning back to */
/* the host so we at least don't hang the machine. */
goto loop;
}

View File

@ -0,0 +1,262 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* print-nexus.c: Monitor debug print facility
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
#define IN_MONITOR_SPACE
#include "monitor.h"
int mon_vprint(vm_t *vm, char *fmt, va_list args);
static unsigned int power_of_ten[] = {
1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000,
};
int
monprint(vm_t *vm, char *fmt, ...)
{
va_list args;
int ret;
va_start(args, fmt);
ret = mon_vprint(vm, fmt, args);
va_end(args);
return(ret);
}
int
mon_vprint(vm_t *vm, char *fmt, va_list args)
{
unsigned offset, size;
unsigned char *log_buff_p;
int ret;
if (vm->log_buffer_info.locked)
return 0;
vm->log_buffer_info.locked = 1;
vm->log_buffer_info.event = 1;
offset = vm->log_buffer_info.offset;
/* Sanity check */
if (offset >= LOG_BUFF_SIZE) {
vm->guest.addr.log_buffer[0] = 0; /* Null terminate. */
resetPrintBuf(vm);
return(0);
}
size = LOG_BUFF_SIZE - offset;
log_buff_p = &vm->guest.addr.log_buffer[offset];
ret = mon_vsnprintf(log_buff_p, size, fmt, args);
if (ret == -1) {
/* Terminate current contents since new print request did not work. */
*log_buff_p = 0;
/* If we are in the monitor space, then we can request that the
* current buffer contents be printed.
*/
resetPrintBuf(vm);
sysFlushPrintBuf(vm);
/* Print request did not fit. dump buffer contents and try again
* using whole buffer.
*/
size = LOG_BUFF_SIZE;
log_buff_p = &vm->guest.addr.log_buffer[0];
ret = mon_vsnprintf(log_buff_p, size, fmt, args);
if (ret == -1) {
/* We have serious problems. This print request will not even
* fit in the whole buffer.
*/
vm->guest.addr.log_buffer[0] = 0; /* Null terminate. */
resetPrintBuf(vm);
/* xxx Put error in buffer here. */
return(0);
}
}
vm->log_buffer_info.offset += ret;
vm->log_buffer_info.locked = 0;
#if 0 /* Fri Dec 27 21:43:05 EST 2002 */
resetPrintBuf(vm);
sysFlushPrintBuf(vm);
#endif
return(ret);
}
void
resetPrintBuf(vm_t *vm)
{
vm->log_buffer_info.event = 0;
vm->log_buffer_info.locked = 0;
vm->log_buffer_info.offset = 0;
vm->log_buffer_info.error = 0;
}
/* For now, this is a simple vsnprintf() type of function. We need
* to fill this out a little.
*/
int
mon_vsnprintf(char *str, unsigned size, const char *fmt, va_list args)
{
int count = 0;
unsigned format_width;
unsigned char c;
while (*fmt) {
switch (*fmt) {
case '%':
format_width = 0;
fmt++;
c = *fmt++;
/* Get optional field width */
if ( (c>='0') && (c<='9') ) {
do {
format_width = (format_width * 10) + (c - '0');
c = *fmt++;
} while ( (c>='0') && (c<='9') );
}
/* %x: hexadecimal */
if ( c == 'x' ) {
unsigned int val, leadin;
int j;
unsigned nibble;
val = va_arg(args, unsigned int);
leadin = 1;
for (j=7; j>=0; j--) {
nibble = (val >> (4 * j)) & 0x0f;
if (leadin && j && !format_width && !nibble)
continue;
if (leadin && j && format_width && ((j+1)>format_width) &&
!nibble)
continue;
leadin = 0;
if ( (count+2) >= size ) goto error;
if (nibble <= 9)
*str++ = nibble + '0';
else
*str++ = (nibble-10) + 'A';
count++;
}
break;
}
/* %c: character */
if ( c == 'c' ) {
unsigned char val;
val = va_arg(args, unsigned);
if ( (count+2) >= size ) goto error;
*str++ = val;
count++;
break;
}
/* %s: string */
if ( c == 's' ) {
unsigned char *s;
s = va_arg(args, unsigned char *);
if ( (count+2) >= size ) goto error;
count++;
while (*s) {
if ( (count+2) >= size ) goto error;
*str++ = *s++; /* Copy char from string to output buffer. */
count++;
}
break;
}
/* %u: unsigned int */
if ( c == 'u' ) {
unsigned int val, leadin;
int j;
unsigned digit;
val = va_arg(args, unsigned int);
leadin = 1;
for (j=9; j>=0; j--) {
if (leadin && j && !format_width && (val < power_of_ten[j]))
continue;
if (leadin && j && format_width && ((j+1)>format_width) &&
(val < power_of_ten[j]))
continue;
leadin = 0;
digit = (val / power_of_ten[j]);
if ( (count+2) >= size ) goto error;
*str++ = digit + '0';
count++;
val -= (digit * power_of_ten[j]);
}
break;
}
/* %b : binary (non-standard but useful) */
if ( c == 'b' ) {
unsigned int val, bit, leadin;
int j;
val = va_arg(args, unsigned int);
leadin = 1;
for (j=31; j>=0; j--) {
bit = (val >> j) & 1;
if (leadin && j && !format_width && !bit)
continue;
if (leadin && j && format_width && ((j+1)>format_width) && !bit)
continue;
leadin = 0;
if ( (count+2) >= size ) goto error;
*str++ = bit + '0';
count++;
}
break;
}
/* Error, unrecognized format char */
goto error;
break;
default:
/* pass char through */
if ( (count+2) >= size ) goto error;
*str++ = *fmt++;
count++;
break;
}
}
*str = 0; /* Complete string with null char */
return(count);
error:
return(-1);
}

View File

@ -0,0 +1,57 @@
/*
* plex86: run multiple x86 operating systems concurrently
* Copyright (C) 1999-2003 Kevin P. Lawton
*
* util-nexus.c: convenience routines which can be accessed from
* either space.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "plex86.h"
/* These functions are available from either space. */
#define IN_MONITOR_SPACE
#define IN_HOST_SPACE
#include "monitor.h"
void
mon_memzero(void *ptr, int size)
{
char *p = ptr;
while (size--)
*p++ = 0;
}
void
mon_memcpy(void *dst, void *src, int size)
{
char *d = dst;
char *s = src;
while (size--)
*d++ = *s++;
}
void *
mon_memset(void *dst, unsigned c, unsigned n)
{
unsigned char *d = dst;
while (n--) {
*d++ = c;
}
return(dst);
}