aboutsummaryrefslogtreecommitdiffstats
path: root/core/bcopyxx.inc
blob: 3658f09749da010f4691f62db7f8b5bc74aa87ef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
;; -----------------------------------------------------------------------
;;
;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;   Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
;;
;;   This program is free software; you can redistribute it and/or modify
;;   it under the terms of the GNU General Public License as published by
;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;;   Boston MA 02111-1307, USA; either version 2 of the License, or
;;   (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------

;;
;; bcopy32xx.inc
;;


;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;

		bits 32
		section .bcopyxx.text
		align 16
;
; pm_bcopy:
;
;	This is the protected-mode core of the "bcopy" routine.
;	Try to do aligned transfers; if the src and dst are relatively
;	misaligned, align the dst.
;
;	ECX is guaranteed to not be zero on entry.
;
;	Clobbers ESI, EDI, ECX.
;

pm_bcopy:
		push ebx
		push edx
		push eax

		cmp esi,-1
		je .bzero

		cmp esi,edi		; If source < destination, we might
		jb .reverse		; have to copy backwards

.forward:
		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .faa1
		movsb
		dec ecx
.faa1:
		mov al,cl
		cmp ecx,2
		jb .f_tiny

		shr edx,1
		jnc .faa2
		movsw
		sub ecx,2
.faa2:

		; Bulk transfer
		mov al,cl		; Save low bits
		shr ecx,2		; Convert to dwords
		rep movsd		; Do our business
		; At this point ecx == 0

		test al,2
		jz .fab2
		movsw
.fab2:
.f_tiny:
		test al,1
		jz .fab1
		movsb
.fab1:
.done:
		pop eax
		pop edx
		pop ebx
		ret

.reverse:
		lea eax,[esi+ecx-1]	; Point to final byte
		cmp edi,eax
		ja .forward		; No overlap, do forward copy

		std			; Reverse copy
		lea edi,[edi+ecx-1]
		mov esi,eax

		; Initial alignment
		mov edx,edi
		shr edx,1
		jc .raa1
		movsb
		dec ecx
.raa1:

		dec esi
		dec edi
		mov al,cl
		cmp ecx,2
		jb .r_tiny
		shr edx,1
		jc .raa2
		movsw
		sub ecx,2
.raa2:

		; Bulk copy
		sub esi,2
		sub edi,2
		mov al,cl		; Save low bits
		shr ecx,2
		rep movsd

		; Final alignment
.r_final:
		add esi,2
		add edi,2
		test al,2
		jz .rab2
		movsw
.rab2:
.r_tiny:
		inc esi
		inc edi
		test al,1
		jz .rab1
		movsb
.rab1:
		cld
		jmp short .done

.bzero:
		xor eax,eax

		; Initial alignment
		mov edx,edi
		shr edx,1
		jnc .zaa1
		stosb
		dec ecx
.zaa1:

		mov bl,cl
		cmp ecx,2
		jb .z_tiny
		shr edx,1
		jnc .zaa2
		stosw
		sub ecx,2
.zaa2:

		; Bulk
		mov bl,cl		; Save low bits
		shr ecx,2
		rep stosd

		test bl,2
		jz .zab2
		stosw
.zab2:
.z_tiny:
		test bl,1
		jz .zab1
		stosb
.zab1:
		jmp short .done

;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory.  This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
;	EDI		-> Pointer to safe area for list + shuffler
;			   (must not overlap this code nor the RM stack)
;	ECX		-> Byte count of list area (for initial copy)
;
;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
;		    this is handled inside the bcopy routine.
;
;     If len == 0:  this marks the end of the list; dst indicates
;		    the entry point and src the mode (0 = pm, 1 = rm)
;
;     (*) dst, src, and len are four bytes each
;
; do_raw_shuffle_and_boot is the same entry point, but with a C ABI:
; do_raw_shuffle_and_boot(safearea, descriptors, bytecount)
;
		global do_raw_shuffle_and_boot
do_raw_shuffle_and_boot:
		mov edi,eax
		mov esi,edx

pm_shuffle:
		cli			; End interrupt service (for good)
		mov ebx,edi		; EBX <- descriptor list
		lea edx,[edi+ecx+15]	; EDX <- where to relocate our code to
		and edx,~15		; Align 16 to benefit the GDT
		call pm_bcopy
		mov esi,__bcopyxx_start	; Absolute source address
		mov edi,edx		; Absolute target address
		sub edx,esi		; EDX <- address delta
		mov ecx,__bcopyxx_dwords
		lea eax,[edx+.safe]	; Resume point
		; Relocate this code
		rep movsd
		jmp eax			; Jump to safe location
.safe:
		; Give ourselves a safe stack
		lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
		add edx,bcopy_gdt	; EDX <- new GDT
		mov [edx+2],edx		; GDT self-pointer
		lgdt [edx]		; Switch to local GDT

		; Now for the actual shuffling...
.loop:
		mov edi,[ebx]
		mov esi,[ebx+4]
		mov ecx,[ebx+8]
		add ebx,12
		jecxz .done
		call pm_bcopy
		jmp .loop
.done:
		lidt [edx+RM_IDT_ptr-bcopy_gdt]	; RM-like IDT
		push ecx		; == 0, for cleaning the flags register
		and esi,esi
		jz pm_shuffle_16
		popfd			; Clean the flags
		jmp edi			; Protected mode entry

		; We have a 16-bit entry point, so we need to return
		; to 16-bit mode.  Note: EDX already points to the GDT.
pm_shuffle_16:
		mov eax,edi
		mov [edx+PM_CS16+2],ax
		mov [edx+PM_DS16+2],ax
		shr eax,16
		mov [edx+PM_CS16+4],al
		mov [edx+PM_CS16+7],ah
		mov [edx+PM_DS16+4],al
		mov [edx+PM_DS16+7],ah
		mov eax,cr0
		and al,~1
		popfd			; Clean the flags
		; No flag-changing instructions below...
		mov dx,PM_DS16
		mov ds,edx
		mov es,edx
		mov fs,edx
		mov gs,edx
		mov ss,edx
		jmp PM_CS16:0

		section	.bcopyxx.data

		alignz 16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1		equ bcopy_gdt.%1-bcopy_gdt
%endmacro

bcopy_gdt:
		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
		dd bcopy_gdt		; pointer for LGDT instruction
		dw 0

		; TSS segment to keep Intel VT happy.  Intel VT is
		; unhappy about anything that doesn't smell like a
		; full-blown 32-bit OS.
	desc TSS
		dw 104-1, DummyTSS	; 08h 32-bit task state segment
		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS

	desc CS16
		dd 0000ffffh		; 10h Code segment, use16, readable,
		dd 00009b00h		; present, dpl 0, cover 64K
	desc DS16
		dd 0000ffffh		; 18h Data segment, use16, read/write,
		dd 00009300h		; present, dpl 0, cover 64K
	desc CS32
		dd 0000ffffh		; 20h Code segment, use32, readable,
		dd 00cf9b00h		; present, dpl 0, cover all 4G
	desc DS32
		dd 0000ffffh		; 28h Data segment, use32, read/write,
		dd 00cf9300h		; present, dpl 0, cover all 4G

bcopy_gdt_size:	equ $-bcopy_gdt
;
; Space for a dummy task state segment.  It should never be actually
; accessed, but just in case it is, point to a chunk of memory that
; has a chance to not be used for anything real...
;
DummyTSS	equ 0x580

		align 4
RM_IDT_ptr:	dw 0FFFFh		; Length (nonsense, but matches CPU)
		dd 0			; Offset

bcopyxx_stack	equ 128			; We want this much stack

		section .rodata
		global __syslinux_shuffler_size
		extern __bcopyxx_len
		align 4
__syslinux_shuffler_size:
		dd __bcopyxx_len

		bits 16
		section .text16