336 lines
6.9 KiB
PHP
336 lines
6.9 KiB
PHP
;; -----------------------------------------------------------------------
|
|
;;
|
|
;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
|
|
;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
|
|
;;
|
|
;; This program is free software; you can redistribute it and/or modify
|
|
;; it under the terms of the GNU General Public License as published by
|
|
;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
|
|
;; Boston MA 02111-1307, USA; either version 2 of the License, or
|
|
;; (at your option) any later version; incorporated herein by reference.
|
|
;;
|
|
;; -----------------------------------------------------------------------
|
|
|
|
;;
|
|
;; bcopy32xx.inc
|
|
;;
|
|
|
|
|
|
;
|
|
; 32-bit bcopy routine
|
|
;
|
|
; This is the actual 32-bit portion of the bcopy and shuffle and boot
|
|
; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
|
|
; sole exception being the actual relocation code at the beginning of
|
|
; pm_shuffle_boot.
|
|
;
|
|
; It also really needs to live all in a single segment, for the
|
|
; address calculcations to actually work.
|
|
;
|
|
|
|
bits 32
|
|
section .bcopyxx.text
|
|
align 16
|
|
;
|
|
; pm_bcopy:
|
|
;
|
|
; This is the protected-mode core of the "bcopy" routine.
|
|
; Try to do aligned transfers; if the src and dst are relatively
|
|
; misaligned, align the dst.
|
|
;
|
|
; ECX is guaranteed to not be zero on entry.
|
|
;
|
|
; Clobbers ESI, EDI, ECX.
|
|
;
|
|
|
|
pm_bcopy:
|
|
push ebx
|
|
push edx
|
|
push eax
|
|
|
|
cmp esi,-1
|
|
je .bzero
|
|
|
|
cmp esi,edi ; If source < destination, we might
|
|
jb .reverse ; have to copy backwards
|
|
|
|
.forward:
|
|
; Initial alignment
|
|
mov edx,edi
|
|
shr edx,1
|
|
jnc .faa1
|
|
movsb
|
|
dec ecx
|
|
.faa1:
|
|
mov al,cl
|
|
cmp ecx,2
|
|
jb .f_tiny
|
|
|
|
shr edx,1
|
|
jnc .faa2
|
|
movsw
|
|
sub ecx,2
|
|
.faa2:
|
|
|
|
; Bulk transfer
|
|
mov al,cl ; Save low bits
|
|
shr ecx,2 ; Convert to dwords
|
|
rep movsd ; Do our business
|
|
; At this point ecx == 0
|
|
|
|
test al,2
|
|
jz .fab2
|
|
movsw
|
|
.fab2:
|
|
.f_tiny:
|
|
test al,1
|
|
jz .fab1
|
|
movsb
|
|
.fab1:
|
|
.done:
|
|
pop eax
|
|
pop edx
|
|
pop ebx
|
|
ret
|
|
|
|
.reverse:
|
|
lea eax,[esi+ecx-1] ; Point to final byte
|
|
cmp edi,eax
|
|
ja .forward ; No overlap, do forward copy
|
|
|
|
std ; Reverse copy
|
|
lea edi,[edi+ecx-1]
|
|
mov esi,eax
|
|
|
|
; Initial alignment
|
|
mov edx,edi
|
|
shr edx,1
|
|
jc .raa1
|
|
movsb
|
|
dec ecx
|
|
.raa1:
|
|
|
|
dec esi
|
|
dec edi
|
|
mov al,cl
|
|
cmp ecx,2
|
|
jb .r_tiny
|
|
shr edx,1
|
|
jc .raa2
|
|
movsw
|
|
sub ecx,2
|
|
.raa2:
|
|
|
|
; Bulk copy
|
|
sub esi,2
|
|
sub edi,2
|
|
mov al,cl ; Save low bits
|
|
shr ecx,2
|
|
rep movsd
|
|
|
|
; Final alignment
|
|
.r_final:
|
|
add esi,2
|
|
add edi,2
|
|
test al,2
|
|
jz .rab2
|
|
movsw
|
|
.rab2:
|
|
.r_tiny:
|
|
inc esi
|
|
inc edi
|
|
test al,1
|
|
jz .rab1
|
|
movsb
|
|
.rab1:
|
|
cld
|
|
jmp short .done
|
|
|
|
.bzero:
|
|
xor eax,eax
|
|
|
|
; Initial alignment
|
|
mov edx,edi
|
|
shr edx,1
|
|
jnc .zaa1
|
|
stosb
|
|
dec ecx
|
|
.zaa1:
|
|
|
|
mov bl,cl
|
|
cmp ecx,2
|
|
jb .z_tiny
|
|
shr edx,1
|
|
jnc .zaa2
|
|
stosw
|
|
sub ecx,2
|
|
.zaa2:
|
|
|
|
; Bulk
|
|
mov bl,cl ; Save low bits
|
|
shr ecx,2
|
|
rep stosd
|
|
|
|
test bl,2
|
|
jz .zab2
|
|
stosw
|
|
.zab2:
|
|
.z_tiny:
|
|
test bl,1
|
|
jz .zab1
|
|
stosb
|
|
.zab1:
|
|
jmp short .done
|
|
|
|
;
|
|
; shuffle_and_boot:
|
|
;
|
|
; This routine is used to shuffle memory around, followed by
|
|
; invoking an entry point somewhere in low memory. This routine
|
|
; can clobber any memory outside the bcopy special area.
|
|
;
|
|
; IMPORTANT: This routine does not set up any registers.
|
|
; It is the responsibility of the caller to generate an appropriate entry
|
|
; stub; *especially* when going to real mode.
|
|
;
|
|
; Inputs:
|
|
; ESI -> Pointer to list of (dst, src, len) pairs(*)
|
|
; EDI -> Pointer to safe area for list + shuffler
|
|
; (must not overlap this code nor the RM stack)
|
|
; ECX -> Byte count of list area (for initial copy)
|
|
;
|
|
; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
|
|
; this is handled inside the bcopy routine.
|
|
;
|
|
; If len == 0: this marks the end of the list; dst indicates
|
|
; the entry point and src the mode (0 = pm, 1 = rm)
|
|
;
|
|
; (*) dst, src, and len are four bytes each
|
|
;
|
|
; do_raw_shuffle_and_boot is the same entry point, but with a C ABI:
|
|
; do_raw_shuffle_and_boot(safearea, descriptors, bytecount)
|
|
;
|
|
global do_raw_shuffle_and_boot
|
|
do_raw_shuffle_and_boot:
|
|
mov edi,eax
|
|
mov esi,edx
|
|
|
|
pm_shuffle:
|
|
cli ; End interrupt service (for good)
|
|
mov ebx,edi ; EBX <- descriptor list
|
|
lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to
|
|
and edx,~15 ; Align 16 to benefit the GDT
|
|
call pm_bcopy
|
|
mov esi,__bcopyxx_start ; Absolute source address
|
|
mov edi,edx ; Absolute target address
|
|
sub edx,esi ; EDX <- address delta
|
|
mov ecx,__bcopyxx_dwords
|
|
lea eax,[edx+.safe] ; Resume point
|
|
; Relocate this code
|
|
rep movsd
|
|
jmp eax ; Jump to safe location
|
|
.safe:
|
|
; Give ourselves a safe stack
|
|
lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
|
|
add edx,bcopy_gdt ; EDX <- new GDT
|
|
mov [edx+2],edx ; GDT self-pointer
|
|
lgdt [edx] ; Switch to local GDT
|
|
|
|
; Now for the actual shuffling...
|
|
.loop:
|
|
mov edi,[ebx]
|
|
mov esi,[ebx+4]
|
|
mov ecx,[ebx+8]
|
|
add ebx,12
|
|
jecxz .done
|
|
call pm_bcopy
|
|
jmp .loop
|
|
.done:
|
|
lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT
|
|
push ecx ; == 0, for cleaning the flags register
|
|
and esi,esi
|
|
jz pm_shuffle_16
|
|
popfd ; Clean the flags
|
|
jmp edi ; Protected mode entry
|
|
|
|
; We have a 16-bit entry point, so we need to return
|
|
; to 16-bit mode. Note: EDX already points to the GDT.
|
|
pm_shuffle_16:
|
|
mov eax,edi
|
|
mov [edx+PM_CS16+2],ax
|
|
mov [edx+PM_DS16+2],ax
|
|
shr eax,16
|
|
mov [edx+PM_CS16+4],al
|
|
mov [edx+PM_CS16+7],ah
|
|
mov [edx+PM_DS16+4],al
|
|
mov [edx+PM_DS16+7],ah
|
|
mov eax,cr0
|
|
and al,~1
|
|
popfd ; Clean the flags
|
|
; No flag-changing instructions below...
|
|
mov dx,PM_DS16
|
|
mov ds,edx
|
|
mov es,edx
|
|
mov fs,edx
|
|
mov gs,edx
|
|
mov ss,edx
|
|
jmp PM_CS16:0
|
|
|
|
section .bcopyxx.data
|
|
|
|
alignz 16
|
|
; GDT descriptor entry
|
|
%macro desc 1
|
|
bcopy_gdt.%1:
|
|
PM_%1 equ bcopy_gdt.%1-bcopy_gdt
|
|
%endmacro
|
|
|
|
bcopy_gdt:
|
|
dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
|
|
dd bcopy_gdt ; pointer for LGDT instruction
|
|
dw 0
|
|
|
|
; TSS segment to keep Intel VT happy. Intel VT is
|
|
; unhappy about anything that doesn't smell like a
|
|
; full-blown 32-bit OS.
|
|
desc TSS
|
|
dw 104-1, DummyTSS ; 08h 32-bit task state segment
|
|
dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
|
|
|
|
desc CS16
|
|
dd 0000ffffh ; 10h Code segment, use16, readable,
|
|
dd 00009b00h ; present, dpl 0, cover 64K
|
|
desc DS16
|
|
dd 0000ffffh ; 18h Data segment, use16, read/write,
|
|
dd 00009300h ; present, dpl 0, cover 64K
|
|
desc CS32
|
|
dd 0000ffffh ; 20h Code segment, use32, readable,
|
|
dd 00cf9b00h ; present, dpl 0, cover all 4G
|
|
desc DS32
|
|
dd 0000ffffh ; 28h Data segment, use32, read/write,
|
|
dd 00cf9300h ; present, dpl 0, cover all 4G
|
|
|
|
bcopy_gdt_size: equ $-bcopy_gdt
|
|
;
|
|
; Space for a dummy task state segment. It should never be actually
|
|
; accessed, but just in case it is, point to a chunk of memory that
|
|
; has a chance to not be used for anything real...
|
|
;
|
|
DummyTSS equ 0x580
|
|
|
|
align 4
|
|
RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU)
|
|
dd 0 ; Offset
|
|
|
|
bcopyxx_stack equ 128 ; We want this much stack
|
|
|
|
section .rodata
|
|
global __syslinux_shuffler_size
|
|
extern __bcopyxx_len
|
|
align 4
|
|
__syslinux_shuffler_size:
|
|
dd __bcopyxx_len
|
|
|
|
bits 16
|
|
section .text16
|