vserver 1.9.3
[linux-2.6.git] / arch / arm / boot / compressed / head.S
1 /*
2  *  linux/arch/arm/boot/compressed/head.S
3  *
4  *  Copyright (C) 1996-2002 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 #include <linux/config.h>
11 #include <linux/linkage.h>
12
13 /*
14  * Debugging stuff
15  *
16  * Note that these macros must not contain any code which is not
17  * 100% relocatable.  Any attempt to do so will result in a crash.
18  * Please select one of the following when turning on debugging.
19  */
20 #ifdef DEBUG
21 #if defined(CONFIG_DEBUG_DC21285_PORT)
22                 .macro  loadsp, rb
23                 mov     \rb, #0x42000000
24                 .endm
25                 .macro  writeb, rb
26                 str     \rb, [r3, #0x160]
27                 .endm
28 #elif defined(CONFIG_DEBUG_ICEDCC)
29                 .macro  loadsp, rb
30                 .endm
31                 .macro writeb, rb
32                 mcr     p14, 0, \rb, c0, c1, 0
33                 .endm
34 #elif defined(CONFIG_FOOTBRIDGE)
35                 .macro  loadsp, rb
36                 mov     \rb, #0x7c000000
37                 .endm
38                 .macro  writeb, rb
39                 strb    \rb, [r3, #0x3f8]
40                 .endm
41 #elif defined(CONFIG_ARCH_RPC)
42                 .macro  loadsp, rb
43                 mov     \rb, #0x03000000
44                 orr     \rb, \rb, #0x00010000
45                 .endm
46                 .macro  writeb, rb
47                 strb    \rb, [r3, #0x3f8 << 2]
48                 .endm
49 #elif defined(CONFIG_ARCH_INTEGRATOR)
50                 .macro  loadsp, rb
51                 mov     \rb, #0x16000000
52                 .endm
53                 .macro  writeb, rb
54                 strb    \rb, [r3, #0]
55                 .endm
56 #elif defined(CONFIG_ARCH_PXA) /* Xscale-type */
57                 .macro  loadsp, rb
58                 mov     \rb, #0x40000000
59                 orr     \rb, \rb, #0x00100000
60                 .endm
61                 .macro  writeb, rb
62                 strb    \rb, [r3, #0]
63                 .endm
64 #elif defined(CONFIG_ARCH_SA1100)
65                 .macro  loadsp, rb
66                 mov     \rb, #0x80000000        @ physical base address
67 #  if defined(CONFIG_DEBUG_LL_SER3)
68                 add     \rb, \rb, #0x00050000   @ Ser3
69 #  else
70                 add     \rb, \rb, #0x00010000   @ Ser1
71 #  endif
72                 .endm
73                 .macro  writeb, rb
74                 str     \rb, [r3, #0x14]        @ UTDR
75                 .endm
76 #elif defined(CONFIG_ARCH_IXP4XX)
77                 .macro  loadsp, rb
78                 mov     \rb, #0xc8000000
79                 .endm
80                 .macro  writeb, rb
81                 str     \rb, [r3, #0]
82 #elif defined(CONFIG_ARCH_IXP2000)
83                 .macro  loadsp, rb
84                 mov     \rb, #0xc0000000
85                 orr     \rb, \rb, #0x00030000
86                 .endm
87                 .macro  writeb, rb
88                 str     \rb, [r3, #0]
89                 .endm
90 #elif defined(CONFIG_ARCH_LH7A40X)
91                 .macro  loadsp, rb
92                 ldr     \rb, =0x80000700        @ UART2 UARTBASE
93                 .endm
94                 .macro  writeb, rb
95                 strb    \rb, [r3, #0]
96                 .endm
97 #elif defined(CONFIG_ARCH_OMAP)
98                 .macro  loadsp, rb
99                 mov     \rb, #0xff000000        @ physical base address
100                 add     \rb, \rb, #0x00fb0000
101 #if defined(CONFIG_OMAP_LL_DEBUG_UART2) || defined(CONFIG_OMAP_LL_DEBUG_UART3)
102                 add     \rb, \rb, #0x00000800
103 #endif
104 #ifdef CONFIG_OMAP_LL_DEBUG_UART3
105                 add     \rb, \rb, #0x00009000
106 #endif
107                 .endm
108                 .macro  writeb, rb
109                 strb    \rb, [r3]
110                 .endm
111 #elif defined(CONFIG_ARCH_IOP331)
112                 .macro loadsp, rb
113                 mov     \rb, #0xff000000
114                 orr     \rb, \rb, #0x00ff0000
115                 orr     \rb, \rb, #0x0000f700   @ location of the UART
116                 .endm
117                 .macro  writeb, rb
118                 str     \rb, [r3, #0]
119                 .endm
120 #else
121 #error no serial architecture defined
122 #endif
123 #endif
124
125                 .macro  kputc,val
126                 mov     r0, \val
127                 bl      putc
128                 .endm
129
130                 .macro  kphex,val,len
131                 mov     r0, \val
132                 mov     r1, #\len
133                 bl      phex
134                 .endm
135
136                 .macro  debug_reloc_start
137 #ifdef DEBUG
138                 kputc   #'\n'
139                 kphex   r6, 8           /* processor id */
140                 kputc   #':'
141                 kphex   r7, 8           /* architecture id */
142                 kputc   #':'
143                 mrc     p15, 0, r0, c1, c0
144                 kphex   r0, 8           /* control reg */
145                 kputc   #'\n'
146                 kphex   r5, 8           /* decompressed kernel start */
147                 kputc   #'-'
148                 kphex   r8, 8           /* decompressed kernel end  */
149                 kputc   #'>'
150                 kphex   r4, 8           /* kernel execution address */
151                 kputc   #'\n'
152 #endif
153                 .endm
154
155                 .macro  debug_reloc_end
156 #ifdef DEBUG
157                 kphex   r5, 8           /* end of kernel */
158                 kputc   #'\n'
159                 mov     r0, r4
160                 bl      memdump         /* dump 256 bytes at start of kernel */
161 #endif
162                 .endm
163
164                 .section ".start", #alloc, #execinstr
165 /*
166  * sort out different calling conventions
167  */
168                 .align
169 start:
170                 .type   start,#function
171                 .rept   8
172                 mov     r0, r0
173                 .endr
174
175                 b       1f
176                 .word   0x016f2818              @ Magic numbers to help the loader
177                 .word   start                   @ absolute load/run zImage address
178                 .word   _edata                  @ zImage end address
179 1:              mov     r7, r1                  @ save architecture ID
180                 mov     r8, #0                  @ save r0
181
182 #ifndef __ARM_ARCH_2__
183                 /*
184                  * Booting from Angel - need to enter SVC mode and disable
185                  * FIQs/IRQs (numeric definitions from angel arm.h source).
186                  * We only do this if we were in user mode on entry.
187                  */
188                 mrs     r2, cpsr                @ get current mode
189                 tst     r2, #3                  @ not user?
190                 bne     not_angel
191                 mov     r0, #0x17               @ angel_SWIreason_EnterSVC
192                 swi     0x123456                @ angel_SWI_ARM
193 not_angel:
194                 mrs     r2, cpsr                @ turn off interrupts to
195                 orr     r2, r2, #0xc0           @ prevent angel from running
196                 msr     cpsr_c, r2
197 #else
198                 teqp    pc, #0x0c000003         @ turn off interrupts
199 #endif
200
201                 /*
202                  * Note that some cache flushing and other stuff may
203                  * be needed here - is there an Angel SWI call for this?
204                  */
205
206                 /*
207                  * some architecture specific code can be inserted
208                  * by the linker here, but it should preserve r7 and r8.
209                  */
210
211                 .text
212                 adr     r0, LC0
213                 ldmia   r0, {r1, r2, r3, r4, r5, r6, ip, sp}
214                 subs    r0, r0, r1              @ calculate the delta offset
215
216                                                 @ if delta is zero, we are
217                 beq     not_relocated           @ running at the address we
218                                                 @ were linked at.
219
220                 /*
221                  * We're running at a different address.  We need to fix
222                  * up various pointers:
223                  *   r5 - zImage base address
224                  *   r6 - GOT start
225                  *   ip - GOT end
226                  */
227                 add     r5, r5, r0
228                 add     r6, r6, r0
229                 add     ip, ip, r0
230
231 #ifndef CONFIG_ZBOOT_ROM
232                 /*
233                  * If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
234                  * we need to fix up pointers into the BSS region.
235                  *   r2 - BSS start
236                  *   r3 - BSS end
237                  *   sp - stack pointer
238                  */
239                 add     r2, r2, r0
240                 add     r3, r3, r0
241                 add     sp, sp, r0
242
243                 /*
244                  * Relocate all entries in the GOT table.
245                  */
246 1:              ldr     r1, [r6, #0]            @ relocate entries in the GOT
247                 add     r1, r1, r0              @ table.  This fixes up the
248                 str     r1, [r6], #4            @ C references.
249                 cmp     r6, ip
250                 blo     1b
251 #else
252
253                 /*
254                  * Relocate entries in the GOT table.  We only relocate
255                  * the entries that are outside the (relocated) BSS region.
256                  */
257 1:              ldr     r1, [r6, #0]            @ relocate entries in the GOT
258                 cmp     r1, r2                  @ entry < bss_start ||
259                 cmphs   r3, r1                  @ _end < entry
260                 addlo   r1, r1, r0              @ table.  This fixes up the
261                 str     r1, [r6], #4            @ C references.
262                 cmp     r6, ip
263                 blo     1b
264 #endif
265
266 not_relocated:  mov     r0, #0
267 1:              str     r0, [r2], #4            @ clear bss
268                 str     r0, [r2], #4
269                 str     r0, [r2], #4
270                 str     r0, [r2], #4
271                 cmp     r2, r3
272                 blo     1b
273
274                 /*
275                  * The C runtime environment should now be setup
276                  * sufficiently.  Turn the cache on, set up some
277                  * pointers, and start decompressing.
278                  */
279                 bl      cache_on
280
281                 mov     r1, sp                  @ malloc space above stack
282                 add     r2, sp, #0x10000        @ 64k max
283
284 /*
285  * Check to see if we will overwrite ourselves.
286  *   r4 = final kernel address
287  *   r5 = start of this image
288  *   r2 = end of malloc space (and therefore this image)
289  * We basically want:
290  *   r4 >= r2 -> OK
291  *   r4 + image length <= r5 -> OK
292  */
293                 cmp     r4, r2
294                 bhs     wont_overwrite
295                 add     r0, r4, #4096*1024      @ 4MB largest kernel size
296                 cmp     r0, r5
297                 bls     wont_overwrite
298
299                 mov     r5, r2                  @ decompress after malloc space
300                 mov     r0, r5
301                 mov     r3, r7
302                 bl      decompress_kernel
303
304                 add     r0, r0, #127
305                 bic     r0, r0, #127            @ align the kernel length
306 /*
307  * r0     = decompressed kernel length
308  * r1-r3  = unused
309  * r4     = kernel execution address
310  * r5     = decompressed kernel start
311  * r6     = processor ID
312  * r7     = architecture ID
313  * r8-r14 = unused
314  */
315                 add     r1, r5, r0              @ end of decompressed kernel
316                 adr     r2, reloc_start
317                 ldr     r3, LC1
318                 add     r3, r2, r3
319 1:              ldmia   r2!, {r8 - r13}         @ copy relocation code
320                 stmia   r1!, {r8 - r13}
321                 ldmia   r2!, {r8 - r13}
322                 stmia   r1!, {r8 - r13}
323                 cmp     r2, r3
324                 blo     1b
325
326                 bl      cache_clean_flush
327                 add     pc, r5, r0              @ call relocation code
328
329 /*
330  * We're not in danger of overwriting ourselves.  Do this the simple way.
331  *
332  * r4     = kernel execution address
333  * r7     = architecture ID
334  */
335 wont_overwrite: mov     r0, r4
336                 mov     r3, r7
337                 bl      decompress_kernel
338                 b       call_kernel
339
340                 .type   LC0, #object
341 LC0:            .word   LC0                     @ r1
342                 .word   __bss_start             @ r2
343                 .word   _end                    @ r3
344                 .word   zreladdr                @ r4
345                 .word   _start                  @ r5
346                 .word   _got_start              @ r6
347                 .word   _got_end                @ ip
348                 .word   user_stack+4096         @ sp
349 LC1:            .word   reloc_end - reloc_start
350                 .size   LC0, . - LC0
351
352 #ifdef CONFIG_ARCH_RPC
353                 .globl  params
354 params:         ldr     r0, =params_phys
355                 mov     pc, lr
356                 .ltorg
357                 .align
358 #endif
359
360 /*
361  * Turn on the cache.  We need to setup some page tables so that we
362  * can have both the I and D caches on.
363  *
364  * We place the page tables 16k down from the kernel execution address,
365  * and we hope that nothing else is using it.  If we're using it, we
366  * will go pop!
367  *
368  * On entry,
369  *  r4 = kernel execution address
370  *  r6 = processor ID
371  *  r7 = architecture number
372  *  r8 = run-time address of "start"
373  * On exit,
374  *  r1, r2, r3, r8, r9, r12 corrupted
375  * This routine must preserve:
376  *  r4, r5, r6, r7
377  */
378                 .align  5
379 cache_on:       mov     r3, #8                  @ cache_on function
380                 b       call_cache_fn
381
382 __setup_mmu:    sub     r3, r4, #16384          @ Page directory size
383                 bic     r3, r3, #0xff           @ Align the pointer
384                 bic     r3, r3, #0x3f00
385 /*
386  * Initialise the page tables, turning on the cacheable and bufferable
387  * bits for the RAM area only.
388  */
389                 mov     r0, r3
390                 mov     r8, r0, lsr #18
391                 mov     r8, r8, lsl #18         @ start of RAM
392                 add     r9, r8, #0x10000000     @ a reasonable RAM size
393                 mov     r1, #0x12
394                 orr     r1, r1, #3 << 10
395                 add     r2, r3, #16384
396 1:              cmp     r1, r8                  @ if virt > start of RAM
397                 orrhs   r1, r1, #0x0c           @ set cacheable, bufferable
398                 cmp     r1, r9                  @ if virt > end of RAM
399                 bichs   r1, r1, #0x0c           @ clear cacheable, bufferable
400                 str     r1, [r0], #4            @ 1:1 mapping
401                 add     r1, r1, #1048576
402                 teq     r0, r2
403                 bne     1b
404 /*
405  * If ever we are running from Flash, then we surely want the cache
406  * to be enabled also for our execution instance...  We map 2MB of it
407  * so there is no map overlap problem for up to 1 MB compressed kernel.
408  * If the execution is in RAM then we would only be duplicating the above.
409  */
410                 mov     r1, #0x1e
411                 orr     r1, r1, #3 << 10
412                 mov     r2, pc, lsr #20
413                 orr     r1, r1, r2, lsl #20
414                 add     r0, r3, r2, lsl #2
415                 str     r1, [r0], #4
416                 add     r1, r1, #1048576
417                 str     r1, [r0]
418                 mov     pc, lr
419
420 __armv4_cache_on:
421                 mov     r12, lr
422                 bl      __setup_mmu
423                 mov     r0, #0
424                 mcr     p15, 0, r0, c7, c10, 4  @ drain write buffer
425                 mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
426                 mrc     p15, 0, r0, c1, c0, 0   @ read control reg
427                 orr     r0, r0, #0x5000         @ I-cache enable, RR cache replacement
428                 orr     r0, r0, #0x0030
429                 bl      __common_cache_on
430                 mov     r0, #0
431                 mcr     p15, 0, r0, c8, c7, 0   @ flush I,D TLBs
432                 mov     pc, r12
433
434 __arm6_cache_on:
435                 mov     r12, lr
436                 bl      __setup_mmu
437                 mov     r0, #0
438                 mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
439                 mcr     p15, 0, r0, c5, c0, 0   @ invalidate whole TLB v3
440                 mov     r0, #0x30
441                 bl      __common_cache_on
442                 mov     r0, #0
443                 mcr     p15, 0, r0, c5, c0, 0   @ invalidate whole TLB v3
444                 mov     pc, r12
445
446 __common_cache_on:
447 #ifndef DEBUG
448                 orr     r0, r0, #0x000d         @ Write buffer, mmu
449 #endif
450                 mov     r1, #-1
451                 mcr     p15, 0, r3, c2, c0, 0   @ load page table pointer
452                 mcr     p15, 0, r1, c3, c0, 0   @ load domain access control
453                 mcr     p15, 0, r0, c1, c0, 0   @ load control register
454                 mov     pc, lr
455
456 /*
457  * All code following this line is relocatable.  It is relocated by
458  * the above code to the end of the decompressed kernel image and
459  * executed there.  During this time, we have no stacks.
460  *
461  * r0     = decompressed kernel length
462  * r1-r3  = unused
463  * r4     = kernel execution address
464  * r5     = decompressed kernel start
465  * r6     = processor ID
466  * r7     = architecture ID
467  * r8-r14 = unused
468  */
469                 .align  5
470 reloc_start:    add     r8, r5, r0
471                 debug_reloc_start
472                 mov     r1, r4
473 1:
474                 .rept   4
475                 ldmia   r5!, {r0, r2, r3, r9 - r13}     @ relocate kernel
476                 stmia   r1!, {r0, r2, r3, r9 - r13}
477                 .endr
478
479                 cmp     r5, r8
480                 blo     1b
481                 debug_reloc_end
482
483 call_kernel:    bl      cache_clean_flush
484                 bl      cache_off
485                 mov     r0, #0
486                 mov     r1, r7                  @ restore architecture number
487                 mov     pc, r4                  @ call kernel
488
489 /*
490  * Here follow the relocatable cache support functions for the
491  * various processors.  This is a generic hook for locating an
492  * entry and jumping to an instruction at the specified offset
493  * from the start of the block.  Please note this is all position
494  * independent code.
495  *
496  *  r1  = corrupted
497  *  r2  = corrupted
498  *  r3  = block offset
499  *  r6  = corrupted
500  *  r12 = corrupted
501  */
502
503 call_cache_fn:  adr     r12, proc_types
504                 mrc     p15, 0, r6, c0, c0      @ get processor ID
505 1:              ldr     r1, [r12, #0]           @ get value
506                 ldr     r2, [r12, #4]           @ get mask
507                 eor     r1, r1, r6              @ (real ^ match)
508                 tst     r1, r2                  @       & mask
509                 addeq   pc, r12, r3             @ call cache function
510                 add     r12, r12, #4*5
511                 b       1b
512
513 /*
514  * Table for cache operations.  This is basically:
515  *   - CPU ID match
516  *   - CPU ID mask
517  *   - 'cache on' method instruction
518  *   - 'cache off' method instruction
519  *   - 'cache flush' method instruction
520  *
521  * We match an entry using: ((real_id ^ match) & mask) == 0
522  *
523  * Writethrough caches generally only need 'on' and 'off'
524  * methods.  Writeback caches _must_ have the flush method
525  * defined.
526  */
527                 .type   proc_types,#object
528 proc_types:
529                 .word   0x41560600              @ ARM6/610
530                 .word   0xffffffe0
531                 b       __arm6_cache_off        @ works, but slow
532                 b       __arm6_cache_off
533                 mov     pc, lr
534 @               b       __arm6_cache_on         @ untested
535 @               b       __arm6_cache_off
536 @               b       __armv3_cache_flush
537
538                 .word   0x00000000              @ old ARM ID
539                 .word   0x0000f000
540                 mov     pc, lr
541                 mov     pc, lr
542                 mov     pc, lr
543
544                 .word   0x41007000              @ ARM7/710
545                 .word   0xfff8fe00
546                 b       __arm7_cache_off
547                 b       __arm7_cache_off
548                 mov     pc, lr
549
550                 .word   0x41807200              @ ARM720T (writethrough)
551                 .word   0xffffff00
552                 b       __armv4_cache_on
553                 b       __armv4_cache_off
554                 mov     pc, lr
555
556                 .word   0x00007000              @ ARM7 IDs
557                 .word   0x0000f000
558                 mov     pc, lr
559                 mov     pc, lr
560                 mov     pc, lr
561
562                 @ Everything from here on will be the new ID system.
563
564                 .word   0x4401a100              @ sa110 / sa1100
565                 .word   0xffffffe0
566                 b       __armv4_cache_on
567                 b       __armv4_cache_off
568                 b       __armv4_cache_flush
569
570                 .word   0x6901b110              @ sa1110
571                 .word   0xfffffff0
572                 b       __armv4_cache_on
573                 b       __armv4_cache_off
574                 b       __armv4_cache_flush
575
576                 @ These match on the architecture ID
577
578                 .word   0x00020000              @ ARMv4T
579                 .word   0x000f0000
580                 b       __armv4_cache_on
581                 b       __armv4_cache_off
582                 b       __armv4_cache_flush
583
584                 .word   0x00050000              @ ARMv5TE
585                 .word   0x000f0000
586                 b       __armv4_cache_on
587                 b       __armv4_cache_off
588                 b       __armv4_cache_flush
589
590                 .word   0x00060000              @ ARMv5TEJ
591                 .word   0x000f0000
592                 b       __armv4_cache_on
593                 b       __armv4_cache_off
594                 b       __armv4_cache_flush
595
596                 .word   0x00070000              @ ARMv6
597                 .word   0x000f0000
598                 b       __armv4_cache_on
599                 b       __armv4_cache_off
600                 b       __armv6_cache_flush
601
602                 .word   0                       @ unrecognised type
603                 .word   0
604                 mov     pc, lr
605                 mov     pc, lr
606                 mov     pc, lr
607
608                 .size   proc_types, . - proc_types
609
610 /*
611  * Turn off the Cache and MMU.  ARMv3 does not support
612  * reading the control register, but ARMv4 does.
613  *
614  * On entry,  r6 = processor ID
615  * On exit,   r0, r1, r2, r3, r12 corrupted
616  * This routine must preserve: r4, r6, r7
617  */
618                 .align  5
619 cache_off:      mov     r3, #12                 @ cache_off function
620                 b       call_cache_fn
621
622 __armv4_cache_off:
623                 mrc     p15, 0, r0, c1, c0
624                 bic     r0, r0, #0x000d
625                 mcr     p15, 0, r0, c1, c0      @ turn MMU and cache off
626                 mov     r0, #0
627                 mcr     p15, 0, r0, c7, c7      @ invalidate whole cache v4
628                 mcr     p15, 0, r0, c8, c7      @ invalidate whole TLB v4
629                 mov     pc, lr
630
631 __arm6_cache_off:
632                 mov     r0, #0x00000030         @ ARM6 control reg.
633                 b       __armv3_cache_off
634
635 __arm7_cache_off:
636                 mov     r0, #0x00000070         @ ARM7 control reg.
637                 b       __armv3_cache_off
638
639 __armv3_cache_off:
640                 mcr     p15, 0, r0, c1, c0, 0   @ turn MMU and cache off
641                 mov     r0, #0
642                 mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
643                 mcr     p15, 0, r0, c5, c0, 0   @ invalidate whole TLB v3
644                 mov     pc, lr
645
646 /*
647  * Clean and flush the cache to maintain consistency.
648  *
649  * On entry,
650  *  r6 = processor ID
651  * On exit,
652  *  r1, r2, r3, r11, r12 corrupted
653  * This routine must preserve:
654  *  r0, r4, r5, r6, r7
655  */
656                 .align  5
657 cache_clean_flush:
658                 mov     r3, #16
659                 b       call_cache_fn
660
661 __armv6_cache_flush:
662                 mov     r1, #0
663                 mcr     p15, 0, r1, c7, c14, 0  @ clean+invalidate D
664                 mcr     p15, 0, r1, c7, c5, 0   @ invalidate I+BTB
665                 mcr     p15, 0, r1, c7, c15, 0  @ clean+invalidate unified
666                 mcr     p15, 0, r1, c7, c10, 4  @ drain WB
667                 mov     pc, lr
668
669 __armv4_cache_flush:
670                 mov     r2, #64*1024            @ default: 32K dcache size (*2)
671                 mov     r11, #32                @ default: 32 byte line size
672                 mrc     p15, 0, r3, c0, c0, 1   @ read cache type
673                 teq     r3, r6                  @ cache ID register present?
674                 beq     no_cache_id
675                 mov     r1, r3, lsr #18
676                 and     r1, r1, #7
677                 mov     r2, #1024
678                 mov     r2, r2, lsl r1          @ base dcache size *2
679                 tst     r3, #1 << 14            @ test M bit
680                 addne   r2, r2, r2, lsr #1      @ +1/2 size if M == 1
681                 mov     r3, r3, lsr #12
682                 and     r3, r3, #3
683                 mov     r11, #8
684                 mov     r11, r11, lsl r3        @ cache line size in bytes
685 no_cache_id:
686                 bic     r1, pc, #63             @ align to longest cache line
687                 add     r2, r1, r2
688 1:              ldr     r3, [r1], r11           @ s/w flush D cache
689                 teq     r1, r2
690                 bne     1b
691
692                 mcr     p15, 0, r1, c7, c5, 0   @ flush I cache
693                 mcr     p15, 0, r1, c7, c6, 0   @ flush D cache
694                 mcr     p15, 0, r1, c7, c10, 4  @ drain WB
695                 mov     pc, lr
696
697 __armv3_cache_flush:
698                 mov     r1, #0
699                 mcr     p15, 0, r0, c7, c0, 0   @ invalidate whole cache v3
700                 mov     pc, lr
701
702 /*
703  * Various debugging routines for printing hex characters and
704  * memory, which again must be relocatable.
705  */
706 #ifdef DEBUG
707                 .type   phexbuf,#object
708 phexbuf:        .space  12
709                 .size   phexbuf, . - phexbuf
710
711 phex:           adr     r3, phexbuf
712                 mov     r2, #0
713                 strb    r2, [r3, r1]
714 1:              subs    r1, r1, #1
715                 movmi   r0, r3
716                 bmi     puts
717                 and     r2, r0, #15
718                 mov     r0, r0, lsr #4
719                 cmp     r2, #10
720                 addge   r2, r2, #7
721                 add     r2, r2, #'0'
722                 strb    r2, [r3, r1]
723                 b       1b
724
725 puts:           loadsp  r3
726 1:              ldrb    r2, [r0], #1
727                 teq     r2, #0
728                 moveq   pc, lr
729 2:              writeb  r2
730                 mov     r1, #0x00020000
731 3:              subs    r1, r1, #1
732                 bne     3b
733                 teq     r2, #'\n'
734                 moveq   r2, #'\r'
735                 beq     2b
736                 teq     r0, #0
737                 bne     1b
738                 mov     pc, lr
739 putc:
740                 mov     r2, r0
741                 mov     r0, #0
742                 loadsp  r3
743                 b       2b
744
745 memdump:        mov     r12, r0
746                 mov     r10, lr
747                 mov     r11, #0
748 2:              mov     r0, r11, lsl #2
749                 add     r0, r0, r12
750                 mov     r1, #8
751                 bl      phex
752                 mov     r0, #':'
753                 bl      putc
754 1:              mov     r0, #' '
755                 bl      putc
756                 ldr     r0, [r12, r11, lsl #2]
757                 mov     r1, #8
758                 bl      phex
759                 and     r0, r11, #7
760                 teq     r0, #3
761                 moveq   r0, #' '
762                 bleq    putc
763                 and     r0, r11, #7
764                 add     r11, r11, #1
765                 teq     r0, #7
766                 bne     1b
767                 mov     r0, #'\n'
768                 bl      putc
769                 cmp     r11, #64
770                 blt     2b
771                 mov     pc, r10
772 #endif
773
774 reloc_end:
775
776                 .align
777                 .section ".stack", "w"
778 user_stack:     .space  4096