最近在使用TI的DRA726芯片。A15端需要访问图像,而图像是在外设空间的,用DMA拷贝到CACHE空间。

这样就导致了DMA的CACHE一致性的问题,需要在DMA之后清除所使用图像空间的数据CACHE。

以这个A15核心为例,解析一下ARM的CACHE操作,涉及的文件有:cacheflush.h   cache-v7.S  proc-macros.S  proc-v7.S

内存是OS中非常厉害,非常复杂的一套系统,科学严谨,每一部分都需要研究几本书才能够彻底明白。

CACHE最基本的就是加速原理,依据就是程序的局部性原理。

CACHE实际实施起来,细节就非常复杂了,比如启动的过程中,如何建立CACHE,从直接访问内存到CACHE访问等等具体问题。

这次主要就项目中的CACHE一致性问题,借机会给组员们一起分享了。

/*

* arch/arm/include/asm/cacheflush.h

*

* Copyright (C) 1999-2002 Russell King

*

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License version 2 as

* published by the Free Software Foundation.

*/

#ifndef _ASMARM_CACHEFLUSH_H

#define _ASMARM_CACHEFLUSH_H

#include

#include

#include

#include

#include

#define CACHE_COLOUR(vaddr)((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)

/*

* This flag is used to indicate that the page pointed to by a pte is clean

* and does not require cleaning before returning it to the user.

*/

#define PG_dcache_clean PG_arch_1

/*

*MM Cache Management

*===================

*

*The arch/arm/mm/cache-*.S and arch/arm/mm/proc-*.S files

*implement these methods.

*

*Start addresses are inclusive and end addresses are exclusive;

*start addresses should be rounded down, end addresses up.

*

*See Documentation/cachetlb.txt for more information.

*Please note that the implementation of these, and the required

*effects are cache-type (VIVT/VIPT/PIPT) specific.

*

*flush_icache_all()

*

*Unconditionally clean and invalidate the entire icache.

*Currently only needed for cache-v6.S and cache-v7.S, see

*__flush_icache_all for the generic implementation.

*

*flush_kern_all()

*

*Unconditionally clean and invalidate the entire cache.

*

* flush_kern_louis()

*

* Flush data cache levels up to the level of unification

* inner shareable and invalidate the I-cache.

* Only needed from v7 onwards, falls back to flush_cache_all()

* for all other processor versions.

*

*flush_user_all()

*

*Clean and invalidate all user space cache entries

*before a change of page tables.

*

*flush_user_range(start, end, flags)

*

*Clean and invalidate a range of cache entries in the

*specified address space before a change of page tables.

*- start - user start address (inclusive, page aligned)

*- end - user end address (exclusive, page aligned)

*- flags - vma->vm_flags field

*

*coherent_kern_range(start, end)

*

*Ensure coherency between the Icache and the Dcache in the

*region described by start, end. If you have non-snooping

*Harvard caches, you need to implement this function.

*- start - virtual start address

*- end - virtual end address

*

*coherent_user_range(start, end)

*

*Ensure coherency between the Icache and the Dcache in the

*region described by start, end. If you have non-snooping

*Harvard caches, you need to implement this function.

*- start - virtual start address

*- end - virtual end address

*

*flush_kern_dcache_area(kaddr, size)

*

*Ensure that the data held in page is written back.

*- kaddr - page address

*- size - region size

*

*DMA Cache Coherency

*===================

*

*dma_flush_range(start, end)

*

*Clean and invalidate the specified virtual address range.

*- start - virtual start address

*- end - virtual end address

*/

struct cpu_cache_fns {

void (*flush_icache_all)(void);

void (*flush_kern_all)(void);

void (*flush_kern_louis)(void);

void (*flush_user_all)(void);

void (*flush_user_range)(unsigned long, unsigned long, unsigned int);

void (*coherent_kern_range)(unsigned long, unsigned long);

int (*coherent_user_range)(unsigned long, unsigned long);

void (*flush_kern_dcache_area)(void *, size_t);

void (*dma_map_area)(const void *, size_t, int);

void (*dma_unmap_area)(const void *, size_t, int);

void (*dma_flush_range)(const void *, const void *);

};

/*

* Select the calling method

*/

#ifdef MULTI_CACHE

extern struct cpu_cache_fns cpu_cache;

#define __cpuc_flush_icache_allcpu_cache.flush_icache_all

#define __cpuc_flush_kern_allcpu_cache.flush_kern_all

#define __cpuc_flush_kern_louiscpu_cache.flush_kern_louis

#define __cpuc_flush_user_allcpu_cache.flush_user_all

#define __cpuc_flush_user_rangecpu_cache.flush_user_range

#define __cpuc_coherent_kern_rangecpu_cache.coherent_kern_range

#define __cpuc_coherent_user_rangecpu_cache.coherent_user_range

#define __cpuc_flush_dcache_areacpu_cache.flush_kern_dcache_area

/*

* These are private to the dma-mapping API. Do not use directly.

* Their sole purpose is to ensure that data held in the cache

* is visible to DMA, or data written by DMA to system memory is

* visible to the CPU.

*/

#define dmac_map_areacpu_cache.dma_map_area

#define dmac_unmap_areacpu_cache.dma_unmap_area

#define dmac_flush_rangecpu_cache.dma_flush_range

#else

extern void __cpuc_flush_icache_all(void);

extern void __cpuc_flush_kern_all(void);

extern void __cpuc_flush_kern_louis(void);

extern void __cpuc_flush_user_all(void);

extern void __cpuc_flush_user_range(unsigned long, unsigned long, unsigned int);

extern void __cpuc_coherent_kern_range(unsigned long, unsigned long);

extern int __cpuc_coherent_user_range(unsigned long, unsigned long);

extern void __cpuc_flush_dcache_area(void *, size_t);

/*

* These are private to the dma-mapping API. Do not use directly.

* Their sole purpose is to ensure that data held in the cache

* is visible to DMA, or data written by DMA to system memory is

* visible to the CPU.

*/

extern void dmac_map_area(const void *, size_t, int);

extern void dmac_unmap_area(const void *, size_t, int);

extern void dmac_flush_range(const void *, const void *);

#endif

/*

* Copy user data from/to a page which is mapped into a different

* processes address space. Really, we want to allow our "user

* space" model to handle this.

*/

extern void copy_to_user_page(struct vm_area_struct *, struct page *,

unsigned long, void *, const void *, unsigned long);

#define copy_from_user_page(vma, page, vaddr, dst, src, len) \

do {\

memcpy(dst, src, len);\

} while (0)

/*

* Convert calls to our calling convention.

*/

/* Invalidate I-cache */

#define __flush_icache_all_generic()\

asm("mcrp15, 0, %0, c7, c5, 0"\

: : "r" (0));

/* Invalidate I-cache inner shareable */

#define __flush_icache_all_v7_smp()\

asm("mcrp15, 0, %0, c7, c1, 0"\

: : "r" (0));

/*

* Optimized __flush_icache_all for the common cases. Note that UP ARMv7

* will fall through to use __flush_icache_all_generic.

*/

#if (defined(CONFIG_CPU_V7) && \

(defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K))) || \

defined(CONFIG_SMP_ON_UP)

#define __flush_icache_preferred__cpuc_flush_icache_all

#elif __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)

#define __flush_icache_preferred__flush_icache_all_v7_smp

#elif __LINUX_ARM_ARCH__ == 6 && defined(CONFIG_ARM_ERRATA_411920)

#define __flush_icache_preferred__cpuc_flush_icache_all

#else

#define __flush_icache_preferred__flush_icache_all_generic

#endif

static inline void __flush_icache_all(void)

{

__flush_icache_preferred();

dsb();

}

/*

* Flush caches up to Level of Unification Inner Shareable

*/

#define flush_cache_louis()__cpuc_flush_kern_louis()

#define flush_cache_all()__cpuc_flush_kern_all()

static inline void vivt_flush_cache_mm(struct mm_struct *mm)

{

if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))

__cpuc_flush_user_all();

}

static inline void

vivt_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)

{

struct mm_struct *mm = vma->vm_mm;

if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm)))

__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),

vma->vm_flags);

}

static inline void

vivt_flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)

{

struct mm_struct *mm = vma->vm_mm;

if (!mm || cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {

unsigned long addr = user_addr & PAGE_MASK;

__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);

}

}

#ifndef CONFIG_CPU_CACHE_VIPT

#define flush_cache_mm(mm) \

vivt_flush_cache_mm(mm)

#define flush_cache_range(vma,start,end) \

vivt_flush_cache_range(vma,start,end)

#define flush_cache_page(vma,addr,pfn) \

vivt_flush_cache_page(vma,addr,pfn)

#else

extern void flush_cache_mm(struct mm_struct *mm);

extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);

extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn);

#endif

#define flush_cache_dup_mm(mm) flush_cache_mm(mm)

/*

* flush_cache_user_range is used when we want to ensure that the

* Harvard caches are synchronised for the user space address range.

* This is used for the ARM private sys_cacheflush system call.

*/

#define flush_cache_user_range(s,e)__cpuc_coherent_user_range(s,e)

/*

* Perform necessary cache operations to ensure that data previously

* stored within this range of addresses can be executed by the CPU.

*/

#define flush_icache_range(s,e)__cpuc_coherent_kern_range(s,e)

/*

* Perform necessary cache operations to ensure that the TLB will

* see data written in the specified area.

*/

#define clean_dcache_area(start,size)cpu_dcache_clean_area(start, size)

/*

* flush_dcache_page is used when the kernel has written to the page

* cache page at virtual address page->virtual.

*

* If this page isn't mapped (ie, page_mapping == NULL), or it might

* have userspace mappings, then we _must_ always clean + invalidate

* the dcache entries associated with the kernel mapping.

*

* Otherwise we can defer the operation, and clean the cache when we are

* about to change to user space. This is the same method as used on SPARC64.

* See update_mmu_cache for the user space part.

*/

#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1

extern void flush_dcache_page(struct page *);

static inline void flush_kernel_vmap_range(void *addr, int size)

{

if ((cache_is_vivt() || cache_is_vipt_aliasing()))

__cpuc_flush_dcache_area(addr, (size_t)size);

}

static inline void invalidate_kernel_vmap_range(void *addr, int size)

{

if ((cache_is_vivt() || cache_is_vipt_aliasing()))

__cpuc_flush_dcache_area(addr, (size_t)size);

}

#define ARCH_HAS_FLUSH_ANON_PAGE

static inline void flush_anon_page(struct vm_area_struct *vma,

struct page *page, unsigned long vmaddr)

{

extern void __flush_anon_page(struct vm_area_struct *vma,

struct page *, unsigned long);

if (PageAnon(page))

__flush_anon_page(vma, page, vmaddr);

}

#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE

extern void flush_kernel_dcache_page(struct page *);

#define flush_dcache_mmap_lock(mapping) \

spin_lock_irq(&(mapping)->tree_lock)

#define flush_dcache_mmap_unlock(mapping) \

spin_unlock_irq(&(mapping)->tree_lock)

#define flush_icache_user_range(vma,page,addr,len) \

flush_dcache_page(page)

/*

* We don't appear to need to do anything here. In fact, if we did, we'd

* duplicate cache flushing elsewhere performed by flush_dcache_page().

*/

#define flush_icache_page(vma,page)do { } while (0)

/*

* flush_cache_vmap() is used when creating mappings (eg, via vmap,

* vmalloc, ioremap etc) in kernel space for pages. On non-VIPT

* caches, since the direct-mappings of these pages may contain cached

* data, we need to do a full cache flush to ensure that writebacks

* don't corrupt data placed into these pages via the new mappings.

*/

static inline void flush_cache_vmap(unsigned long start, unsigned long end)

{

if (!cache_is_vipt_nonaliasing())

flush_cache_all();

else

/*

* set_pte_at() called from vmap_pte_range() does not

* have a DSB after cleaning the cache line.

*/

dsb(ishst);

}

static inline void flush_cache_vunmap(unsigned long start, unsigned long end)

{

if (!cache_is_vipt_nonaliasing())

flush_cache_all();

}

/*

* Memory synchronization helpers for mixed cached vs non cached accesses.

*

* Some synchronization algorithms have to set states in memory with the

* cache enabled or disabled depending on the code path. It is crucial

* to always ensure proper cache maintenance to update main memory right

* away in that case.

*

* Any cached write must be followed by a cache clean operation.

* Any cached read must be preceded by a cache invalidate operation.

* Yet, in the read case, a cache flush i.e. atomic clean+invalidate

* operation is needed to avoid discarding possible concurrent writes to the

* accessed memory.

*

* Also, in order to prevent a cached writer from interfering with an

* adjacent non-cached writer, each state variable must be located to

* a separate cache line.

*/

/*

* This needs to be >= the max cache writeback size of all

* supported platforms included in the current kernel configuration.

* This is used to align state variables to their own cache lines.

*/

#define __CACHE_WRITEBACK_ORDER 6 /* guessed from existing platforms */

#define __CACHE_WRITEBACK_GRANULE (1 << __CACHE_WRITEBACK_ORDER)

/*

* There is no __cpuc_clean_dcache_area but we use it anyway for

* code intent clarity, and alias it to __cpuc_flush_dcache_area.

*/

#define __cpuc_clean_dcache_area __cpuc_flush_dcache_area

/*

* Ensure preceding writes to *p by this CPU are visible to

* subsequent reads by other CPUs:

*/

static inline void __sync_cache_range_w(volatile void *p, size_t size)

{

char *_p = (char *)p;

__cpuc_clean_dcache_area(_p, size);

outer_clean_range(__pa(_p), __pa(_p + size));

}

/*

* Ensure preceding writes to *p by other CPUs are visible to

* subsequent reads by this CPU. We must be careful not to

* discard data simultaneously written by another CPU, hence the

* usage of flush rather than invalidate operations.

*/

static inline void __sync_cache_range_r(volatile void *p, size_t size)

{

char *_p = (char *)p;

#ifdef CONFIG_OUTER_CACHE

if (outer_cache.flush_range) {

/*

* Ensure dirty data migrated from other CPUs into our cache

* are cleaned out safely before the outer cache is cleaned:

*/

__cpuc_clean_dcache_area(_p, size);

/* Clean and invalidate stale data for *p from outer ... */

outer_flush_range(__pa(_p), __pa(_p + size));

}

#endif

/* ... and inner cache: */

__cpuc_flush_dcache_area(_p, size);

}

#define sync_cache_w(ptr) __sync_cache_range_w(ptr, sizeof *(ptr))

#define sync_cache_r(ptr) __sync_cache_range_r(ptr, sizeof *(ptr))

/*

* Disabling cache access for one CPU in an ARMv7 SMP system is tricky.

* To do so we must:

*

* - Clear the SCTLR.C bit to prevent further cache allocations

* - Flush the desired level of cache

* - Clear the ACTLR "SMP" bit to disable local coherency

*

* ... and so without any intervening memory access in between those steps,

* not even to the stack.

*

* WARNING -- After this has been called:

*

* - No ldrex/strex (and similar) instructions must be used.

* - The CPU is obviously no longer coherent with the other CPUs.

* - This is unlikely to work as expected if Linux is running non-secure.

*

* Note:

*

* - This is known to apply to several ARMv7 processor implementations,

* however some exceptions may exist. Caveat emptor.

*

* - The clobber list is dictated by the call to v7_flush_dcache_*.

* fp is preserved to the stack explicitly prior disabling the cache

* since adding it to the clobber list is incompatible with having

* CONFIG_FRAME_POINTER=y. ip is saved as well if ever r12-clobbering

* trampoline are inserted by the linker and to keep sp 64-bit aligned.

*/

#define v7_exit_coherency_flush(level) \

asm volatile( \

"stmfdsp!, {fp, ip} \n\t" \

"mrcp15, 0, r0, c1, c0, 0@ get SCTLR \n\t" \

"bicr0, r0, #"__stringify(CR_C)" \n\t" \

"mcrp15, 0, r0, c1, c0, 0@ set SCTLR \n\t" \

"isb\n\t" \

"blv7_flush_dcache_"__stringify(level)" \n\t" \

"clrex\n\t" \

"mrcp15, 0, r0, c1, c0, 1@ get ACTLR \n\t" \

"bicr0, r0, #(1 << 6)@ disable local coherency \n\t" \

"mcrp15, 0, r0, c1, c0, 1@ set ACTLR \n\t" \

"isb\n\t" \

"dsb\n\t" \

"ldmfdsp!, {fp, ip}" \

: : : "r0","r1","r2","r3","r4","r5","r6","r7", \

"r9","r10","lr","memory" )

int set_memory_ro(unsigned long addr, int numpages);

int set_memory_rw(unsigned long addr, int numpages);

int set_memory_x(unsigned long addr, int numpages);

int set_memory_nx(unsigned long addr, int numpages);

#endif

/*

* linux/arch/arm/mm/cache-v7.S

*

* Copyright (C) 2001 Deep Blue Solutions Ltd.

* Copyright (C) 2005 ARM Ltd.

*

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License version 2 as

* published by the Free Software Foundation.

*

* This is the "shell" of the ARMv7 processor support.

*/

#include

#include

#include

#include

#include

#include "proc-macros.S"

/*

* The secondary kernel init calls v7_flush_dcache_all before it enables

* the L1; however, the L1 comes out of reset in an undefined state, so

* the clean + invalidate performed by v7_flush_dcache_all causes a bunch

* of cache lines with uninitialized data and uninitialized tags to get

* written out to memory, which does really unpleasant things to the main

* processor. We fix this by performing an invalidate, rather than a

* clean + invalidate, before jumping into the kernel.

*

* This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs

* to be called for both secondary cores startup and primary core resume

* procedures.

*/

ENTRY(v7_invalidate_l1)

mov r0, #0

mcr p15, 2, r0, c0, c0, 0

mrc p15, 1, r0, c0, c0, 0

ldr r1, =0x7fff

and r2, r1, r0, lsr #13

ldr r1, =0x3ff

and r3, r1, r0, lsr #3 @ NumWays - 1

add r2, r2, #1 @ NumSets

and r0, r0, #0x7

add r0, r0, #4 @ SetShift

clz r1, r3 @ WayShift

add r4, r3, #1 @ NumWays

1: sub r2, r2, #1 @ NumSets--

mov r3, r4 @ Temp = NumWays

2: subs r3, r3, #1 @ Temp--

mov r5, r3, lsl r1

mov r6, r2, lsl r0

orr r5, r5, r6 @ Reg = (Temp<

mcr p15, 0, r5, c7, c6, 2

bgt 2b

cmp r2, #0

bgt 1b

dsb

isb

mov pc, lr

ENDPROC(v7_invalidate_l1)

/*

*v7_flush_icache_all()

*

*Flush the whole I-cache.

*

*Registers:

*r0 - set to 0

*/

ENTRY(v7_flush_icache_all)

movr0, #0

ALT_SMP(mcrp15, 0, r0, c7, c1, 0)@ invalidate I-cache inner shareable

ALT_UP(mcrp15, 0, r0, c7, c5, 0)@ I+BTB cache invalidate

movpc, lr

ENDPROC(v7_flush_icache_all)

/*

* v7_flush_dcache_louis()

*

* Flush the D-cache up to the Level of Unification Inner Shareable

*

* Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)

*/

ENTRY(v7_flush_dcache_louis)

dmb@ ensure ordering with previous memory accesses

mrcp15, 1, r0, c0, c0, 1@ read clidr, r0 = clidr

ALT_SMP(andsr3, r0, #(7 << 21))@ extract LoUIS from clidr

ALT_UP(andsr3, r0, #(7 << 27))@ extract LoUU from clidr

#ifdef CONFIG_ARM_ERRATA_643719

ALT_SMP(mrceqp15, 0, r2, c0, c0, 0)@ read main ID register

ALT_UP(moveqpc, lr)@ LoUU is zero, so nothing to do

ldreqr1, =0x410fc090 @ ID of ARM Cortex A9 r0p?

biceqr2, r2, #0x0000000f @ clear minor revision number

teqeqr2, r1 @ test for errata affected core and if so...

orreqsr3, #(1 << 21)@ fix LoUIS value (and set flags state to 'ne')

#endif

ALT_SMP(movr3, r3, lsr #20)@ r3 = LoUIS * 2

ALT_UP(movr3, r3, lsr #26)@ r3 = LoUU * 2

moveqpc, lr@ return if level == 0

movr10, #0@ r10 (starting level) = 0

bflush_levels@ start flushing cache levels

ENDPROC(v7_flush_dcache_louis)

/*

*v7_flush_dcache_all()

*

*Flush the whole D-cache.

*

*Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)

*

*- mm - mm_struct describing address space

*/

ENTRY(v7_flush_dcache_all)

dmb@ ensure ordering with previous memory accesses

mrcp15, 1, r0, c0, c0, 1@ read clidr

andsr3, r0, #0x7000000@ extract loc from clidr

movr3, r3, lsr #23@ left align loc bit field

beqfinished@ if loc is 0, then no need to clean

movr10, #0@ start clean at cache level 0

flush_levels:

addr2, r10, r10, lsr #1@ work out 3x current cache level

movr1, r0, lsr r2@ extract cache type bits from clidr

andr1, r1, #7@ mask of the bits for current cache only

cmpr1, #2@ see what cache we have at this level

bltskip@ skip if no cache, or just i-cache

#ifdef CONFIG_PREEMPT

save_and_disable_irqs_notrace r9@ make cssr&csidr read atomic

#endif

mcrp15, 2, r10, c0, c0, 0@ select current cache level in cssr

isb@ isb to sych the new cssr&csidr

mrcp15, 1, r1, c0, c0, 0@ read the new csidr

#ifdef CONFIG_PREEMPT

restore_irqs_notrace r9

#endif

andr2, r1, #7@ extract the length of the cache lines

addr2, r2, #4@ add 4 (line length offset)

ldrr4, =0x3ff

andsr4, r4, r1, lsr #3@ find maximum number on the way size

clzr5, r4@ find bit position of way size increment

ldrr7, =0x7fff

andsr7, r7, r1, lsr #13@ extract max number of the index size

loop1:

movr9, r7@ create working copy of max index

loop2:

ARM(orrr11, r10, r4, lsl r5)@ factor way and cache number into r11

THUMB(lslr6, r4, r5)

THUMB(orrr11, r10, r6)@ factor way and cache number into r11

ARM(orrr11, r11, r9, lsl r2)@ factor index number into r11

THUMB(lslr6, r9, r2)

THUMB(orrr11, r11, r6)@ factor index number into r11

mcrp15, 0, r11, c7, c14, 2@ clean & invalidate by set/way

subsr9, r9, #1@ decrement the index

bgeloop2

subsr4, r4, #1@ decrement the way

bgeloop1

skip:

addr10, r10, #2@ increment cache number

cmpr3, r10

bgtflush_levels

finished:

movr10, #0@ swith back to cache level 0

mcrp15, 2, r10, c0, c0, 0@ select current cache level in cssr

dsb

isb

movpc, lr

ENDPROC(v7_flush_dcache_all)

/*

*v7_flush_cache_all()

*

*Flush the entire cache system.

* The data cache flush is now achieved using atomic clean / invalidates

* working outwards from L1 cache. This is done using Set/Way based cache

* maintenance instructions.

* The instruction cache can still be invalidated back to the point of

* unification in a single instruction.

*

*/

ENTRY(v7_flush_kern_cache_all)

ARM(stmfdsp!, {r4-r5, r7, r9-r11, lr})

THUMB(stmfdsp!, {r4-r7, r9-r11, lr})

blv7_flush_dcache_all

movr0, #0

ALT_SMP(mcrp15, 0, r0, c7, c1, 0)@ invalidate I-cache inner shareable

ALT_UP(mcrp15, 0, r0, c7, c5, 0)@ I+BTB cache invalidate

ARM(ldmfdsp!, {r4-r5, r7, r9-r11, lr})

THUMB(ldmfdsp!, {r4-r7, r9-r11, lr})

movpc, lr

ENDPROC(v7_flush_kern_cache_all)

/*

* v7_flush_kern_cache_louis(void)

*

* Flush the data cache up to Level of Unification Inner Shareable.

* Invalidate the I-cache to the point of unification.

*/

ENTRY(v7_flush_kern_cache_louis)

ARM(stmfdsp!, {r4-r5, r7, r9-r11, lr})

THUMB(stmfdsp!, {r4-r7, r9-r11, lr})

blv7_flush_dcache_louis

movr0, #0

ALT_SMP(mcrp15, 0, r0, c7, c1, 0)@ invalidate I-cache inner shareable

ALT_UP(mcrp15, 0, r0, c7, c5, 0)@ I+BTB cache invalidate

ARM(ldmfdsp!, {r4-r5, r7, r9-r11, lr})

THUMB(ldmfdsp!, {r4-r7, r9-r11, lr})

movpc, lr

ENDPROC(v7_flush_kern_cache_louis)

/*

*v7_flush_cache_all()

*

*Flush all TLB entries in a particular address space

*

*- mm - mm_struct describing address space

*/

ENTRY(v7_flush_user_cache_all)

/*FALLTHROUGH*/

/*

*v7_flush_cache_range(start, end, flags)

*

*Flush a range of TLB entries in the specified address space.

*

*- start - start address (may not be aligned)

*- end - end address (exclusive, may not be aligned)

*- flags- vm_area_struct flags describing address space

*

*It is assumed that:

*- we have a VIPT cache.

*/

ENTRY(v7_flush_user_cache_range)

movpc, lr

ENDPROC(v7_flush_user_cache_all)

ENDPROC(v7_flush_user_cache_range)

/*

*v7_coherent_kern_range(start,end)

*

*Ensure that the I and D caches are coherent within specified

*region. This is typically used when code has been written to

*a memory region, and will be executed.

*

*- start - virtual start address of region

*- end - virtual end address of region

*

*It is assumed that:

*- the Icache does not read data from the write buffer

*/

ENTRY(v7_coherent_kern_range)

/* FALLTHROUGH */

/*

*v7_coherent_user_range(start,end)

*

*Ensure that the I and D caches are coherent within specified

*region. This is typically used when code has been written to

*a memory region, and will be executed.

*

*- start - virtual start address of region

*- end - virtual end address of region

*

*It is assumed that:

*- the Icache does not read data from the write buffer

*/

ENTRY(v7_coherent_user_range)

UNWIND(.fnstart)

dcache_line_size r2, r3

subr3, r2, #1

bicr12, r0, r3

#ifdef CONFIG_ARM_ERRATA_764369

ALT_SMP(W(dsb))

ALT_UP(W(nop))

#endif

1:

USER(mcrp15, 0, r12, c7, c11, 1)@ clean D line to the point of unification

addr12, r12, r2

cmpr12, r1

blo1b

dsbishst

icache_line_size r2, r3

subr3, r2, #1

bicr12, r0, r3

2:

USER(mcrp15, 0, r12, c7, c5, 1)@ invalidate I line

addr12, r12, r2

cmpr12, r1

blo2b

movr0, #0

ALT_SMP(mcrp15, 0, r0, c7, c1, 6)@ invalidate BTB Inner Shareable

ALT_UP(mcrp15, 0, r0, c7, c5, 6)@ invalidate BTB

dsbishst

isb

movpc, lr

/*

* Fault handling for the cache operation above. If the virtual address in r0

* isn't mapped, fail with -EFAULT.

*/

9001:

#ifdef CONFIG_ARM_ERRATA_775420

dsb

#endif

movr0, #-EFAULT

movpc, lr

UNWIND(.fnend)

ENDPROC(v7_coherent_kern_range)

ENDPROC(v7_coherent_user_range)

/*

*v7_flush_kern_dcache_area(void *addr, size_t size)

*

*Ensure that the data held in the page kaddr is written back

*to the page in question.

*

*- addr- kernel address

*- size- region size

*/

ENTRY(v7_flush_kern_dcache_area)

dcache_line_size r2, r3

addr1, r0, r1

subr3, r2, #1

bicr0, r0, r3

#ifdef CONFIG_ARM_ERRATA_764369

ALT_SMP(W(dsb))

ALT_UP(W(nop))

#endif

1:

mcrp15, 0, r0, c7, c14, 1@ clean & invalidate D line / unified line

addr0, r0, r2

cmpr0, r1

blo1b

dsb

movpc, lr

ENDPROC(v7_flush_kern_dcache_area)

/*

*v7_dma_inv_range(start,end)

*

*Invalidate the data cache within the specified region; we will

*be performing a DMA operation in this region and we want to

*purge old data in the cache.

*

*- start - virtual start address of region

*- end - virtual end address of region

*/

v7_dma_inv_range:

dcache_line_size r2, r3

subr3, r2, #1

tstr0, r3

bicr0, r0, r3

#ifdef CONFIG_ARM_ERRATA_764369

ALT_SMP(W(dsb))

ALT_UP(W(nop))

#endif

mcrnep15, 0, r0, c7, c14, 1@ clean & invalidate D / U line

tstr1, r3

bicr1, r1, r3

mcrnep15, 0, r1, c7, c14, 1@ clean & invalidate D / U line

1:

mcrp15, 0, r0, c7, c6, 1@ invalidate D / U line

addr0, r0, r2

cmpr0, r1

blo1b

dsb

movpc, lr

ENDPROC(v7_dma_inv_range)

/*

*v7_dma_clean_range(start,end)

*- start - virtual start address of region

*- end - virtual end address of region

*/

v7_dma_clean_range:

dcache_line_size r2, r3

subr3, r2, #1

bicr0, r0, r3

#ifdef CONFIG_ARM_ERRATA_764369

ALT_SMP(W(dsb))

ALT_UP(W(nop))

#endif

1:

mcrp15, 0, r0, c7, c10, 1@ clean D / U line

addr0, r0, r2

cmpr0, r1

blo1b

dsb

movpc, lr

ENDPROC(v7_dma_clean_range)

/*

*v7_dma_flush_range(start,end)

*- start - virtual start address of region

*- end - virtual end address of region

*/

ENTRY(v7_dma_flush_range)

dcache_line_size r2, r3

subr3, r2, #1

bicr0, r0, r3

#ifdef CONFIG_ARM_ERRATA_764369

ALT_SMP(W(dsb))

ALT_UP(W(nop))

#endif

1:

mcrp15, 0, r0, c7, c14, 1@ clean & invalidate D / U line

addr0, r0, r2

cmpr0, r1

blo1b

dsb

movpc, lr

ENDPROC(v7_dma_flush_range)

/*

*dma_map_area(start, size, dir)

*- start- kernel virtual start address

*- size- size of region

*- dir- DMA direction

*/

ENTRY(v7_dma_map_area)

addr1, r1, r0

teqr2, #DMA_FROM_DEVICE

beqv7_dma_inv_range

bv7_dma_clean_range

ENDPROC(v7_dma_map_area)

/*

*dma_unmap_area(start, size, dir)

*- start- kernel virtual start address

*- size- size of region

*- dir- DMA direction

*/

ENTRY(v7_dma_unmap_area)

addr1, r1, r0

teqr2, #DMA_TO_DEVICE

bnev7_dma_inv_range

movpc, lr

ENDPROC(v7_dma_unmap_area)

__INITDATA

@ define struct cpu_cache_fns (see and proc-macros.S)

define_cache_functions v7

/*

* We need constants.h for:

* VMA_VM_MM

* VMA_VM_FLAGS

* VM_EXEC

*/

#include

#include

/*

* vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)

*/

.macrovma_vm_mm, rd, rn

ldr\rd, [\rn, #VMA_VM_MM]

.endm

/*

* vma_vm_flags - get vma->vm_flags

*/

.macrovma_vm_flags, rd, rn

ldr\rd, [\rn, #VMA_VM_FLAGS]

.endm

.macrotsk_mm, rd, rn

ldr\rd, [\rn, #TI_TASK]

ldr\rd, [\rd, #TSK_ACTIVE_MM]

.endm

/*

* act_mm - get current->active_mm

*/

.macroact_mm, rd

bic\rd, sp, #8128

bic\rd, \rd, #63

ldr\rd, [\rd, #TI_TASK]

ldr\rd, [\rd, #TSK_ACTIVE_MM]

.endm

/*

* mmid - get context id from mm pointer (mm->context.id)

* note, this field is 64bit, so in big-endian the two words are swapped too.

*/

.macrommid, rd, rn

#ifdef __ARMEB__

ldr\rd, [\rn, #MM_CONTEXT_ID + 4 ]

#else

ldr\rd, [\rn, #MM_CONTEXT_ID]

#endif

.endm

/*

* mask_asid - mask the ASID from the context ID

*/

.macroasid, rd, rn

and\rd, \rn, #255

.endm

.macrocrval, clear, mmuset, ucset

#ifdef CONFIG_MMU

.word\clear

.word\mmuset

#else

.word\clear

.word\ucset

#endif

.endm

/*

* dcache_line_size - get the minimum D-cache line size from the CTR register

* on ARMv7.

*/

.macrodcache_line_size, reg, tmp

mrcp15, 0, \tmp, c0, c0, 1@ read ctr

lsr\tmp, \tmp, #16

and\tmp, \tmp, #0xf@ cache line size encoding

mov\reg, #4@ bytes per word

mov\reg, \reg, lsl \tmp@ actual cache line size

.endm

/*

* icache_line_size - get the minimum I-cache line size from the CTR register

* on ARMv7.

*/

.macroicache_line_size, reg, tmp

mrcp15, 0, \tmp, c0, c0, 1@ read ctr

and\tmp, \tmp, #0xf@ cache line size encoding

mov\reg, #4@ bytes per word

mov\reg, \reg, lsl \tmp@ actual cache line size

.endm

/*

* Sanity check the PTE configuration for the code below - which makes

* certain assumptions about how these bits are laid out.

*/

#ifdef CONFIG_MMU

#if L_PTE_SHARED != PTE_EXT_SHARED

#error PTE shared bit mismatch

#endif

#if !defined (CONFIG_ARM_LPAE) && \

(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\

L_PTE_FILE+L_PTE_PRESENT) > L_PTE_SHARED

#error Invalid Linux PTE bit settings

#endif

#endif/* CONFIG_MMU */

/*

* The ARMv6 and ARMv7 set_pte_ext translation function.

*

* Permission translation:

* YUWD APX AP1 AP0SVCUser

* 0xxx 0 0 0no accno acc

* 100x 1 0 1r/ono acc

* 10x0 1 0 1r/ono acc

* 1011 0 0 1r/wno acc

* 110x 1 1 1r/or/o

* 11x0 1 1 1r/or/o

* 1111 0 1 1r/wr/w

*/

.macroarmv6_mt_table pfx

\pfx\()_mt_table:

.long0x00@ L_PTE_MT_UNCACHED

.longPTE_EXT_TEX(1)@ L_PTE_MT_BUFFERABLE

.longPTE_CACHEABLE@ L_PTE_MT_WRITETHROUGH

.longPTE_CACHEABLE | PTE_BUFFERABLE@ L_PTE_MT_WRITEBACK

.longPTE_BUFFERABLE@ L_PTE_MT_DEV_SHARED

.long0x00@ unused

.long0x00@ L_PTE_MT_MINICACHE (not present)

.longPTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE@ L_PTE_MT_WRITEALLOC

.long0x00@ unused

.longPTE_EXT_TEX(1)@ L_PTE_MT_DEV_WC

.long0x00@ unused

.longPTE_CACHEABLE | PTE_BUFFERABLE@ L_PTE_MT_DEV_CACHED

.longPTE_EXT_TEX(2)@ L_PTE_MT_DEV_NONSHARED

.long0x00@ unused

.long0x00@ unused

.longPTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX@ L_PTE_MT_VECTORS

.endm

.macroarmv6_set_pte_ext pfx

strr1, [r0], #2048@ linux version

bicr3, r1, #0x000003fc

bicr3, r3, #PTE_TYPE_MASK

orrr3, r3, r2

orrr3, r3, #PTE_EXT_AP0 | 2

adrip, \pfx\()_mt_table

andr2, r1, #L_PTE_MT_MASK

ldrr2, [ip, r2]

eorr1, r1, #L_PTE_DIRTY

tstr1, #L_PTE_DIRTY|L_PTE_RDONLY

orrner3, r3, #PTE_EXT_APX

tstr1, #L_PTE_USER

orrner3, r3, #PTE_EXT_AP1

tstner3, #PTE_EXT_APX

@ user read-only -> kernel read-only

bicner3, r3, #PTE_EXT_AP0

tstr1, #L_PTE_XN

orrner3, r3, #PTE_EXT_XN

eorr3, r3, r2

tstr1, #L_PTE_YOUNG

tstner1, #L_PTE_PRESENT

moveqr3, #0

tstner1, #L_PTE_NONE

movner3, #0

strr3, [r0]

mcrp15, 0, r0, c7, c10, 1@ flush_pte

.endm

/*

* The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,

* covering most CPUs except Xscale and Xscale 3.

*

* Permission translation:

* YUWD APSVCUser

* 0xxx 0x00no accno acc

* 100x 0x00r/ono acc

* 10x0 0x00r/ono acc

* 1011 0x55r/wno acc

* 110x 0xaar/wr/o

* 11x0 0xaar/wr/o

* 1111 0xffr/wr/w

*/

.macroarmv3_set_pte_ext wc_disable=1

strr1, [r0], #2048@ linux version

eorr3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY

bicr2, r1, #PTE_SMALL_AP_MASK@ keep C, B bits

bicr2, r2, #PTE_TYPE_MASK

orrr2, r2, #PTE_TYPE_SMALL

tstr3, #L_PTE_USER@ user?

orrner2, r2, #PTE_SMALL_AP_URO_SRW

tstr3, #L_PTE_RDONLY | L_PTE_DIRTY@ write and dirty?

orreqr2, r2, #PTE_SMALL_AP_UNO_SRW

tstr3, #L_PTE_PRESENT | L_PTE_YOUNG@ present and young?

movner2, #0

.if\wc_disable

#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH

tstr2, #PTE_CACHEABLE

bicner2, r2, #PTE_BUFFERABLE

#endif

.endif

strr2, [r0]@ hardware version

.endm

/*

* Xscale set_pte_ext translation, split into two halves to cope

* with work-arounds. r3 must be preserved by code between these

* two macros.

*

* Permission translation:

* YUWD APSVCUser

* 0xxx 00no accno acc

* 100x 00r/ono acc

* 10x0 00r/ono acc

* 1011 01r/wno acc

* 110x 10r/wr/o

* 11x0 10r/wr/o

* 1111 11r/wr/w

*/

.macroxscale_set_pte_ext_prologue

strr1, [r0]@ linux version

eorr3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY

bicr2, r1, #PTE_SMALL_AP_MASK@ keep C, B bits

orrr2, r2, #PTE_TYPE_EXT@ extended page

tstr3, #L_PTE_USER@ user?

orrner2, r2, #PTE_EXT_AP_URO_SRW@ yes -> user r/o, system r/w

tstr3, #L_PTE_RDONLY | L_PTE_DIRTY@ write and dirty?

orreqr2, r2, #PTE_EXT_AP_UNO_SRW@ yes -> user n/a, system r/w

@ combined with user -> user r/w

.endm

.macroxscale_set_pte_ext_epilogue

tstr3, #L_PTE_PRESENT | L_PTE_YOUNG@ present and young?

movner2, #0@ no -> fault

strr2, [r0, #2048]!@ hardware version

movip, #0

mcrp15, 0, r0, c7, c10, 1@ clean L1 D line

mcrp15, 0, ip, c7, c10, 4@ data write barrier

.endm

.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0

.type\name\()_processor_functions, #object

.align 2

ENTRY(\name\()_processor_functions)

.word\dabort

.word\pabort

.wordcpu_\name\()_proc_init

.wordcpu_\name\()_proc_fin

.wordcpu_\name\()_reset

.wordcpu_\name\()_do_idle

.wordcpu_\name\()_dcache_clean_area

.wordcpu_\name\()_switch_mm

.if \nommu

.word0

.else

.wordcpu_\name\()_set_pte_ext

.endif

.if \suspend

.wordcpu_\name\()_suspend_size

#ifdef CONFIG_PM_SLEEP

.wordcpu_\name\()_do_suspend

.wordcpu_\name\()_do_resume

#else

.word0

.word0

#endif

.else

.word0

.word0

.word0

.endif

.size\name\()_processor_functions, . - \name\()_processor_functions

.endm

.macro define_cache_functions name:req

.align 2

.type\name\()_cache_fns, #object

ENTRY(\name\()_cache_fns)

.long\name\()_flush_icache_all

.long\name\()_flush_kern_cache_all

.long \name\()_flush_kern_cache_louis

.long\name\()_flush_user_cache_all

.long\name\()_flush_user_cache_range

.long\name\()_coherent_kern_range

.long\name\()_coherent_user_range

.long\name\()_flush_kern_dcache_area

.long\name\()_dma_map_area

.long\name\()_dma_unmap_area

.long\name\()_dma_flush_range

.size\name\()_cache_fns, . - \name\()_cache_fns

.endm

.macro define_tlb_functions name:req, flags_up:req, flags_smp

.type\name\()_tlb_fns, #object

ENTRY(\name\()_tlb_fns)

.long\name\()_flush_user_tlb_range

.long\name\()_flush_kern_tlb_range

.ifnb \flags_smp

ALT_SMP(.long\flags_smp )

ALT_UP(.long\flags_up )

.else

.long\flags_up

.endif

.size\name\()_tlb_fns, . - \name\()_tlb_fns

.endm

.macro globl_equ x, y

.globl\x

.equ\x, \y

.endm

/*

* linux/arch/arm/mm/proc-v7.S

*

* Copyright (C) 2001 Deep Blue Solutions Ltd.

*

* This program is free software; you can redistribute it and/or modify

* it under the terms of the GNU General Public License version 2 as

* published by the Free Software Foundation.

*

* This is the "shell" of the ARMv7 processor support.

*/

#include

#include

#include

#include

#include

#include

#include

#include "proc-macros.S"

#ifdef CONFIG_ARM_LPAE

#include "proc-v7-3level.S"

#else

#include "proc-v7-2level.S"

#endif

ENTRY(cpu_v7_proc_init)

movpc, lr

ENDPROC(cpu_v7_proc_init)

ENTRY(cpu_v7_proc_fin)

mrcp15, 0, r0, c1, c0, 0@ ctrl register

bicr0, r0, #0x1000@ ...i............

bicr0, r0, #0x0006@ .............ca.

mcrp15, 0, r0, c1, c0, 0@ disable caches

movpc, lr

ENDPROC(cpu_v7_proc_fin)

/*

*cpu_v7_reset(loc)

*

*Perform a soft reset of the system. Put the CPU into the

*same state as it would be if it had been reset, and branch

*to what would be the reset vector.

*

*- loc - location to jump to for soft reset

*

*This code must be executed using a flat identity mapping with

* caches disabled.

*/

.align5

.pushsection.idmap.text, "ax"

ENTRY(cpu_v7_reset)

mrcp15, 0, r1, c1, c0, 0@ ctrl register

bicr1, r1, #0x1@ ...............m

THUMB(bicr1, r1, #1 << 30 )@ SCTLR.TE (Thumb exceptions)

mcrp15, 0, r1, c1, c0, 0@ disable MMU

isb

bxr0

ENDPROC(cpu_v7_reset)

.popsection

/*

*cpu_v7_do_idle()

*

*Idle the processor (eg, wait for interrupt).

*

*IRQs are already disabled.

*/

ENTRY(cpu_v7_do_idle)

dsb@ WFI may enter a low-power mode

wfi

movpc, lr

ENDPROC(cpu_v7_do_idle)

ENTRY(cpu_v7_dcache_clean_area)

ALT_SMP(W(nop))@ MP extensions imply L1 PTW

ALT_UP_B(1f)

movpc, lr

1:dcache_line_size r2, r3

2:mcrp15, 0, r0, c7, c10, 1@ clean D entry

addr0, r0, r2

subsr1, r1, r2

bhi2b

dsbishst

movpc, lr

ENDPROC(cpu_v7_dcache_clean_area)

stringcpu_v7_name, "ARMv7 Processor"

.align

/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */

.globlcpu_v7_suspend_size

.equcpu_v7_suspend_size, 4 * 9

#ifdef CONFIG_ARM_CPU_SUSPEND

ENTRY(cpu_v7_do_suspend)

stmfdsp!, {r4 - r10, lr}

mrcp15, 0, r4, c13, c0, 0@ FCSE/PID

mrcp15, 0, r5, c13, c0, 3@ User r/o thread ID

stmiar0!, {r4 - r5}

#ifdef CONFIG_MMU

mrcp15, 0, r6, c3, c0, 0@ Domain ID

#ifdef CONFIG_ARM_LPAE

mrrcp15, 1, r5, r7, c2@ TTB 1

#else

mrcp15, 0, r7, c2, c0, 1@ TTB 1

#endif

mrcp15, 0, r11, c2, c0, 2@ TTB control register

#endif

mrcp15, 0, r8, c1, c0, 0@ Control register

mrcp15, 0, r9, c1, c0, 1@ Auxiliary control register

mrcp15, 0, r10, c1, c0, 2@ Co-processor access control

stmiar0, {r5 - r11}

ldmfdsp!, {r4 - r10, pc}

ENDPROC(cpu_v7_do_suspend)

ENTRY(cpu_v7_do_resume)

movip, #0

mcrp15, 0, ip, c7, c5, 0@ invalidate I cache

mcrp15, 0, ip, c13, c0, 1@ set reserved context ID

ldmiar0!, {r4 - r5}

mcrp15, 0, r4, c13, c0, 0@ FCSE/PID

mcrp15, 0, r5, c13, c0, 3@ User r/o thread ID

ldmiar0, {r5 - r11}

#ifdef CONFIG_MMU

mcrp15, 0, ip, c8, c7, 0@ invalidate TLBs

mcrp15, 0, r6, c3, c0, 0@ Domain ID

#ifdef CONFIG_ARM_LPAE

mcrrp15, 0, r1, ip, c2@ TTB 0

mcrrp15, 1, r5, r7, c2@ TTB 1

#else

ALT_SMP(orrr1, r1, #TTB_FLAGS_SMP)

ALT_UP(orrr1, r1, #TTB_FLAGS_UP)

mcrp15, 0, r1, c2, c0, 0@ TTB 0

mcrp15, 0, r7, c2, c0, 1@ TTB 1

#endif

mcrp15, 0, r11, c2, c0, 2@ TTB control register

ldrr4, =PRRR@ PRRR

ldrr5, =NMRR@ NMRR

mcrp15, 0, r4, c10, c2, 0@ write PRRR

mcrp15, 0, r5, c10, c2, 1@ write NMRR

#endif/* CONFIG_MMU */

mrcp15, 0, r4, c1, c0, 1@ Read Auxiliary control register

teqr4, r9@ Is it already set?

mcrnep15, 0, r9, c1, c0, 1@ No, so write it

mcrp15, 0, r10, c1, c0, 2@ Co-processor access control

isb

dsb

movr0, r8@ control register

bcpu_resume_mmu

ENDPROC(cpu_v7_do_resume)

#endif

#ifdef CONFIG_CPU_PJ4B

globl_equcpu_pj4b_switch_mm, cpu_v7_switch_mm

globl_equcpu_pj4b_set_pte_ext,cpu_v7_set_pte_ext

globl_equcpu_pj4b_proc_init,cpu_v7_proc_init

globl_equcpu_pj4b_proc_fin, cpu_v7_proc_fin

globl_equcpu_pj4b_reset, cpu_v7_reset

#ifdef CONFIG_PJ4B_ERRATA_4742

ENTRY(cpu_pj4b_do_idle)

dsb@ WFI may enter a low-power mode

wfi

dsb@barrier

movpc, lr

ENDPROC(cpu_pj4b_do_idle)

#else

globl_equcpu_pj4b_do_idle, cpu_v7_do_idle

#endif

globl_equcpu_pj4b_dcache_clean_area,cpu_v7_dcache_clean_area

globl_equcpu_pj4b_do_suspend,cpu_v7_do_suspend

globl_equcpu_pj4b_do_resume,cpu_v7_do_resume

globl_equcpu_pj4b_suspend_size,cpu_v7_suspend_size

#endif

/*

*__v7_setup

*

*Initialise TLB, Caches, and MMU state ready to switch the MMU

*on. Return in r0 the new CP15 C1 control register setting.

*

*This should be able to cover all ARMv7 cores.

*

*It is assumed that:

*- cache type register is implemented

*/

__v7_ca5mp_setup:

__v7_ca9mp_setup:

__v7_cr7mp_setup:

movr10, #(1 << 0)@ Cache/TLB ops broadcasting

b1f

__v7_ca7mp_setup:

__v7_ca15mp_setup:

movr10, #0

1:

#ifdef CONFIG_SMP

ALT_SMP(mrcp15, 0, r0, c1, c0, 1)

ALT_UP(movr0, #(1 << 6))@ fake it for UP

tstr0, #(1 << 6)@ SMP/nAMP mode enabled?

orreqr0, r0, #(1 << 6)@ Enable SMP/nAMP mode

orreqr0, r0, r10@ Enable CPU-specific SMP bits

mcreqp15, 0, r0, c1, c0, 1

#endif

b__v7_setup

__v7_pj4b_setup:

#ifdef CONFIG_CPU_PJ4B

/* Auxiliary Debug Modes Control 1 Register */

#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */

#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */

#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */

/* Auxiliary Debug Modes Control 2 Register */

#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */

#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */

#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */

#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */

#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */

#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\

PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR)

/* Auxiliary Functional Modes Control Register 0 */

#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */

#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */

#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */

/* Auxiliary Debug Modes Control 0 Register */

#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */

/* Auxiliary Debug Modes Control 1 Register */

mrcp15, 1,r0, c15, c1, 1

orr r0, r0, #PJ4B_CLEAN_LINE

orr r0, r0, #PJ4B_INTER_PARITY

bicr0, r0, #PJ4B_STATIC_BP

mcrp15, 1,r0, c15, c1, 1

/* Auxiliary Debug Modes Control 2 Register */

mrcp15, 1,r0, c15, c1, 2

bicr0, r0, #PJ4B_FAST_LDR

orrr0, r0, #PJ4B_AUX_DBG_CTRL2

mcrp15, 1,r0, c15, c1, 2

/* Auxiliary Functional Modes Control Register 0 */

mrcp15, 1,r0, c15, c2, 0

#ifdef CONFIG_SMP

orrr0, r0, #PJ4B_SMP_CFB

#endif

orrr0, r0, #PJ4B_L1_PAR_CHK

orrr0, r0, #PJ4B_BROADCAST_CACHE

mcrp15, 1,r0, c15, c2, 0

/* Auxiliary Debug Modes Control 0 Register */

mrcp15, 1,r0, c15, c1, 0

orrr0, r0, #PJ4B_WFI_WFE

mcrp15, 1,r0, c15, c1, 0

#endif /* CONFIG_CPU_PJ4B */

__v7_setup:

adrr12, __v7_setup_stack@ the local stack

stmiar12, {r0-r5, r7, r9, r11, lr}

bl v7_flush_dcache_louis

ldmiar12, {r0-r5, r7, r9, r11, lr}

mrcp15, 0, r0, c0, c0, 0@ read main ID register

andr10, r0, #0xff000000@ ARM?

teqr10, #0x41000000

bne3f

andr5, r0, #0x00f00000@ variant

andr6, r0, #0x0000000f@ revision

orrr6, r6, r5, lsr #20-4@ combine variant and revision

ubfxr0, r0, #4, #12@ primary part number

/* Cortex-A8 Errata */

ldrr10, =0x00000c08@ Cortex-A8 primary part number

teqr0, r10

bne2f

#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)

teqr5, #0x00100000@ only present in r1p*

mrceqp15, 0, r10, c1, c0, 1@ read aux control register

orreqr10, r10, #(1 << 6)@ set IBE to 1

mcreqp15, 0, r10, c1, c0, 1@ write aux control register

#endif

#ifdef CONFIG_ARM_ERRATA_458693

teqr6, #0x20@ only present in r2p0

mrceqp15, 0, r10, c1, c0, 1@ read aux control register

orreqr10, r10, #(1 << 5)@ set L1NEON to 1

orreqr10, r10, #(1 << 9)@ set PLDNOP to 1

mcreqp15, 0, r10, c1, c0, 1@ write aux control register

#endif

#ifdef CONFIG_ARM_ERRATA_460075

teqr6, #0x20@ only present in r2p0

mrceqp15, 1, r10, c9, c0, 2@ read L2 cache aux ctrl register

tsteqr10, #1 << 22

orreqr10, r10, #(1 << 22)@ set the Write Allocate disable bit

mcreqp15, 1, r10, c9, c0, 2@ write the L2 cache aux ctrl register

#endif

b3f

/* Cortex-A9 Errata */

2:ldrr10, =0x00000c09@ Cortex-A9 primary part number

teqr0, r10

bne3f

#ifdef CONFIG_ARM_ERRATA_742230

cmpr6, #0x22@ only present up to r2p2

mrclep15, 0, r10, c15, c0, 1@ read diagnostic register

orrler10, r10, #1 << 4@ set bit #4

mcrlep15, 0, r10, c15, c0, 1@ write diagnostic register

#endif

#ifdef CONFIG_ARM_ERRATA_742231

teqr6, #0x20@ present in r2p0

teqner6, #0x21@ present in r2p1

teqner6, #0x22@ present in r2p2

mrceqp15, 0, r10, c15, c0, 1@ read diagnostic register

orreqr10, r10, #1 << 12@ set bit #12

orreqr10, r10, #1 << 22@ set bit #22

mcreqp15, 0, r10, c15, c0, 1@ write diagnostic register

#endif

#ifdef CONFIG_ARM_ERRATA_743622

teqr5, #0x00200000@ only present in r2p*

mrceqp15, 0, r10, c15, c0, 1@ read diagnostic register

orreqr10, r10, #1 << 6@ set bit #6

mcreqp15, 0, r10, c15, c0, 1@ write diagnostic register

#endif

#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)

ALT_SMP(cmp r6, #0x30)@ present prior to r3p0

ALT_UP_B(1f)

mrcltp15, 0, r10, c15, c0, 1@ read diagnostic register

orrltr10, r10, #1 << 11@ set bit #11

mcrltp15, 0, r10, c15, c0, 1@ write diagnostic register

1:

#endif

/* Cortex-A15 Errata */

3:ldrr10, =0x00000c0f@ Cortex-A15 primary part number

teqr0, r10

bne4f

#ifdef CONFIG_ARM_ERRATA_773022

cmpr6, #0x4@ only present up to r0p4

mrclep15, 0, r10, c1, c0, 1@ read aux control register

orrler10, r10, #1 << 1@ disable loop buffer

mcrlep15, 0, r10, c1, c0, 1@ write aux control register

#endif

4:movr10, #0

mcrp15, 0, r10, c7, c5, 0@ I+BTB cache invalidate

#ifdef CONFIG_MMU

mcrp15, 0, r10, c8, c7, 0@ invalidate I + D TLBs

v7_ttb_setup r10, r4, r8, r5@ TTBCR, TTBRx setup

ldrr5, =PRRR@ PRRR

ldrr6, =NMRR@ NMRR

mcrp15, 0, r5, c10, c2, 0@ write PRRR

mcrp15, 0, r6, c10, c2, 1@ write NMRR

#endif

dsb@ Complete invalidations

#ifndef CONFIG_ARM_THUMBEE

mrcp15, 0, r0, c0, c1, 0@ read ID_PFR0 for ThumbEE

andr0, r0, #(0xf << 12)@ ThumbEE enabled field

teqr0, #(1 << 12)@ check if ThumbEE is present

bne1f

movr5, #0

mcrp14, 6, r5, c1, c0, 0@ Initialize TEEHBR to 0

mrcp14, 6, r0, c0, c0, 0@ load TEECR

orrr0, r0, #1@ set the 1st bit in order to

mcrp14, 6, r0, c0, c0, 0@ stop userspace TEEHBR access

1:

#endif

adrr5, v7_crval

ldmiar5, {r5, r6}

ARM_BE8(orrr6, r6, #1 << 25)@ big-endian page tables

#ifdef CONFIG_SWP_EMULATE

orr r5, r5, #(1 << 10) @ set SW bit in "clear"

bic r6, r6, #(1 << 10) @ clear it in "mmuset"

#endif

mrcp15, 0, r0, c1, c0, 0@ read control register

bicr0, r0, r5@ clear bits them

orrr0, r0, r6@ set them

THUMB(orrr0, r0, #1 << 30)@ Thumb exceptions

movpc, lr@ return to head.S:__ret

ENDPROC(__v7_setup)

.align2

__v7_setup_stack:

.space4 * 11@ 11 registers

__INITDATA

@ define struct processor (see and proc-macros.S)

define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1

#ifdef CONFIG_CPU_PJ4B

define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1

#endif

.section ".rodata"

stringcpu_arch_name, "armv7"

stringcpu_elf_name, "v7"

.align

.section ".proc.info.init", #alloc, #execinstr

/*

* Standard v7 proc info content

*/

.macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions

ALT_SMP(.longPMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \

PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)

ALT_UP(.longPMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \

PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)

.longPMD_TYPE_SECT | PMD_SECT_AP_WRITE | \

PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags

W(b)\initfunc

.longcpu_arch_name

.longcpu_elf_name

.longHWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \

HWCAP_EDSP | HWCAP_TLS | \hwcaps

.longcpu_v7_name

.long\proc_fns

.longv7wbi_tlb_fns

.longv6_user_fns

.longv7_cache_fns

.endm

#ifndef CONFIG_ARM_LPAE

/*

* ARM Ltd. Cortex A5 processor.

*/

.type __v7_ca5mp_proc_info, #object

__v7_ca5mp_proc_info:

.long0x410fc050

.long0xff0ffff0

__v7_proc __v7_ca5mp_setup

.size__v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info

/*

* ARM Ltd. Cortex A9 processor.

*/

.type __v7_ca9mp_proc_info, #object

__v7_ca9mp_proc_info:

.long0x410fc090

.long0xff0ffff0

__v7_proc __v7_ca9mp_setup

.size__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info

#endif/* CONFIG_ARM_LPAE */

/*

* Marvell PJ4B processor.

*/

#ifdef CONFIG_CPU_PJ4B

.type __v7_pj4b_proc_info, #object

__v7_pj4b_proc_info:

.long0x560f5800

.long0xff0fff00

__v7_proc __v7_pj4b_setup, proc_fns = pj4b_processor_functions

.size__v7_pj4b_proc_info, . - __v7_pj4b_proc_info

#endif

/*

* ARM Ltd. Cortex R7 processor.

*/

.type__v7_cr7mp_proc_info, #object

__v7_cr7mp_proc_info:

.long0x410fc170

.long0xff0ffff0

__v7_proc __v7_cr7mp_setup

.size__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info

/*

* ARM Ltd. Cortex A7 processor.

*/

.type__v7_ca7mp_proc_info, #object

__v7_ca7mp_proc_info:

.long0x410fc070

.long0xff0ffff0

__v7_proc __v7_ca7mp_setup

.size__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info

/*

* ARM Ltd. Cortex A15 processor.

*/

.type__v7_ca15mp_proc_info, #object

__v7_ca15mp_proc_info:

.long0x410fc0f0

.long0xff0ffff0

__v7_proc __v7_ca15mp_setup

.size__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info

/*

* Qualcomm Inc. Krait processors.

*/

.type__krait_proc_info, #object

__krait_proc_info:

.long0x510f0400@ Required ID value

.long0xff0ffc00@ Mask for ID

/*

* Some Krait processors don't indicate support for SDIV and UDIV

* instructions in the ARM instruction set, even though they actually

* do support them.

*/

__v7_proc __v7_setup, hwcaps = HWCAP_IDIV

.size__krait_proc_info, . - __krait_proc_info

/*

* Match any ARMv7 processor core.

*/

.type__v7_proc_info, #object

__v7_proc_info:

.long0x000f0000@ Required ID value

.long0x000f0000@ Mask for ID

__v7_proc __v7_setup

.size__v7_proc_info, . - __v7_proc_info

更多推荐

arm linux s文件夹,armv7对应的CACHE操作相关文件解析