MODULE  ARM_MEMORY

         PUBLIC  ARM_MEMCPY
PUBLIC ARM_MEMSET
PUBLIC ARM_MEMSET8
PUBLIC ARM_MEMSET16
PUBLIC ARM_MEMSET32 SECTION .text:CODE:NOROOT()
CODE32 ;-------------------------------------------------------------------------------
; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 pSrc
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R12 Used for data transfers
; R14 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMCPY:
;-------------------------------------------------------------------------------
cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R12, R0, #+ ; R0 = destination address
beq ARM_MEMCPY_DestIsDWordAligned ; Is destination address already word aligned ? ;-------------------------------------------------------------------------------
; Handle as much bytes as necessary to align destination address
;
ldrb R3, [R1], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R12, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R12 ; Adjust NumBytes : 1, 2, 3
ldrbls R12, [R1], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
strb R3, [R0], #+
ldrbcc R3, [R1], #+ ; Carry clear (CC)? -> We need one more byte
strbls R12, [R0], #+
sub R2, R2, #+ ; Adjust NumBytes
strbcc R3, [R0], #+ ; now destination address already is word aligned ;-------------------------------------------------------------------------------
; Choose best way to transfer data
;
ARM_MEMCPY_DestIsDWordAligned:
ands R3, R1, #+
beq ARM_MEMCPY_HandleBulkWordData ; If source and destination are aligned, use bulk word transfer subs R2, R2, #+
bcc ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word left, use single byte transfer ldr R12, [R1, -R3]! ; Read first mis-aligned data word and word align source address
cmp R3, #+
beq ARM_MEMCPY_Loop16BitShift bhi ARM_MEMCPY_Loop24BitShift ;-------------------------------------------------------------------------------
; Handle data in units of word
;
; This is done by reading mis-aligned words from source address and
; shift them into the right alignment. After this the next data word
; will be read to complete the missing data part.
;
ARM_MEMCPY_Loop8BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop8BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop16BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop16BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop24BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop24BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ;-------------------------------------------------------------------------------
; Handle large bulk data in blocks of 8 words (32 bytes)
;
ARM_MEMCPY_HandleBulkWordData:
subs R2, R2, #+0x20
stmdb SP!, {R4, LR}
bcc ARM_MEMCPY_HandleTrailingWords ARM_MEMCPY_LoopHandleBulkWord:
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
subs R2, R2, #+0x20
bcs ARM_MEMCPY_LoopHandleBulkWord ;-------------------------------------------------------------------------------
; Handle trailing 7 words
;
ARM_MEMCPY_HandleTrailingWords:
movs R12, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R1!, {R3, R4, R12, LR} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmcs R0!, {R3, R4, R12, LR}
ldmmi R1!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
stmmi R0!, {R3, R4} movs R12, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmia SP!, {R4, LR}
ldrcs R3, [R1], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
strcs R3, [R0], #+
bxeq LR ;-------------------------------------------------------------------------------
; Handle trailing 3 bytes
;
; N Z C V Q ***** I F T M4 3 2 1 0
; N = bit[31]
; C = last shift bit : shift
; C = 1 ADD/CMN has carry bit
; C = 0 SUB/CMP no borrow bit
; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1
; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0
; BMI : N=1
; BCS : C=1
ARM_MEMCPY_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrbmi R2, [R1], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
ldrbcs R3, [R1], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
ldrbcs R12, [R1], #+
strbmi R2, [R0], #+
strbcs R3, [R0], #+
strbcs R12, [R0], #+
bx LR ;-------------------------------------------------------------------------------
; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 c
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R5 Used for data transfers
; R6 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMSET:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMSET_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq ARM_MEMSET_DestIsAligned ; Is destination address already word aligned ? ; Handle as much bytes as necessary to align destination address strb R1, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes
strbls R1, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
sub R2, R2, #+ ; Adjust NumBytes
strbcc R1, [R0], #+ ; Carry clear (CC)? -> We need one more byte ; Choose best way to transfer data ARM_MEMSET_DestIsAligned: ; destination is aligned, use bulk word transfer ; Handle large bulk data in blocks of 8 words (32 bytes) ARM_MEMSET_HandleBulkWordData:
stmdb SP!, {R4, R5, R6} mov R3, R1, LSL #+ ; Transfer 16 bytes at once
mov R4, R1, LSL #+
mov R5, R1, LSL #+ subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc ARM_MEMSET_HandleTrailingWords ARM_MEMSET_LoopHandleBulkWord:
stm R0!, {R1, R3, R4, R5}
stm R0!, {R1, R3, R4, R5}
subs R2, R2, #+0x20
bcs ARM_MEMSET_LoopHandleBulkWord ; Handle trailing 7 words ARM_MEMSET_HandleTrailingWords:
movs R6, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
stmcs R0!, {R1, R3, R4, R5} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmmi R0!, {R1, R3} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set) movs R6, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strcs R1, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set) ldmia SP!, {R4, R5, R6}
bxeq LR ; Z flag contain no Trailing Bytes ; Handle trailing 3 bytes ARM_MEMSET_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strbmi R1, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
strbcs R1, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
strbcs R1, [R0], #+
bx LR ; int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes);
;-------------------------------------------------------------------------------
ARM_MEMSET8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5}
cmp R2, #
blt ARM_MEMSET8_loop3 ; Alignment is unknown
tst R0, #
strneb R1, [R0], #
subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
orr R1, R1, R1, LSL #
tst R0, #
strneh R1, [R0], #
subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
orr R1, R1, R1, LSL #
mov R3, R1
cmp R2, #
blt ARM_MEMSET8_loop2
tst R0, #
strne R1, [R0], #
subne R2, R2, #
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET8_loop1
add R2, R2, # ARM_MEMSET8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R3}
tst R2, #
strne R1, [R0], #
and R2, R2, # ARM_MEMSET8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords);
;-------------------------------------------------------------------------------
ARM_MEMSET16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, #
strneh R1, [R0], # ; xxxx-xx10 --->
subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
orr R1, R1, R1, LSL #
mov R4, R1 cmp R2, #
blt ARM_MEMSET16_HandleTrailingWords ; 7, 6, ... 0 tst R0, #
strne R1, [R0], # ; xxxx-x100 --->
subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R4} ; xxxx-1000 --->
subne R2, R2, # ; xxxx-0000 ---> ARM_MEMSET16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R3, R1 ARM_MEMSET16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET16_LoopHandleBulkWord
add R2, R2, # ARM_MEMSET16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R4} tst R2, #
strne R1, [R0], # and R2, R2, # ARM_MEMSET16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, #
strgeh R1, [R0], # ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords);
;-------------------------------------------------------------------------------
ARM_MEMSET32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET32_loop2 ; Alignment is known to be at least 32-bit
mov R3, R1 tst R0, #
strne R1, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET32_loop1
add R2, R2, # ARM_MEMSET32_loop2:
; Copy up to 3 remaining 32-bit values
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], # ldmia SP!, {R4, R5}
bx LR ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls arm_memxor_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq arm_memxor_DestIsAligned ; Is destination address already word aligned ? ;-
; Handle as much bytes as necessary to align destination address
;-
ldrb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eor R12, R12, r1
strb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one. cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes ldrbls R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorls R3, R3, r1
strbls R3, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address sub R2, R2, #+ ; Adjust NumBytes ldrbcc R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorcc R3, R3, r1
strbcc R3, [R0], #+ ; Carry clear (CC)? -> We need one more byte ;-
; Choose best way to transfer data
;-
arm_memxor_DestIsAligned: ; destination is aligned, use bulk word transfer
;-
; Handle large bulk data in blocks of 8 words (32 bytes)
;-
arm_memxor_HandleBulkWordData:
stmdb SP!, {R4, R5, R6, R7} subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc arm_memxor_HandleTrailingWords arm_memxor_LoopHandleBulkWord:
ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} subs R2, R2, #+0x20
bcs arm_memxor_LoopHandleBulkWord ;-
; Handle trailing 7 words
;-
arm_memxor_HandleTrailingWords:
movs R7, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R0, {R3, R4, R5, R6}
eorcs r3, r3, r1
eorcs r4, r4, r1
eorcs r5, r5, r1
eorcs r6, r6, r1
stmcs R0!, {R3, R4, R5, R6} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor) ldmmi R0, {R3, R4}
eormi r3, r3, r1
eormi r4, r4, r1
stmmi R0!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor) movs R7, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrcs R3, [R0]
eorcs r3, r3, r1
strcs R3, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor) ldmia SP!, {R4, R5, R6, R7}
bxeq LR ; Z flag contain no Trailing Bytes ;-
; Handle trailing 3 bytes
;-
arm_memxor_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrmi R2, [R0]
eormi R2, R2, r1
strbmi R2, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) bx LR ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor8_loop3 ; Alignment is unknown
tst R0, # ldrneb R6, [R0]
eorne R6, r6, R1
strneb R6, [R0], # subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
cmp R2, #
blt arm_memxor8_loop2
tst R0, # ldrne R6, [R0]
eorne R6, r6, R1
strne R6, [R0], #
; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
subne R2, R2, #
tst R0, # ldmneia R0, {R3, R6}
eorne R3, r3, R1
eorne R6, r6, R1
stmneia R0!, {R3, R6} subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor8_loop1
add R2, R2, # arm_memxor8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R3, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6}
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # ; xxxx-xx10 ---> subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
cmp R2, #
blt arm_memxor16_HandleTrailingWords ; 7, 6, ... 0 tst R0, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # ; xxxx-x100 ---> subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 ---> subne R2, R2, # ; xxxx-0000 ---> arm_memxor16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R6, R1 arm_memxor16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor16_LoopHandleBulkWord
add R2, R2, # arm_memxor16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, # ldrgeh R3, [R0]
eorge r3, r3, r1
strgeh R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} cmp R2, #
blt arm_memxor32_loop2 ; Alignment is known to be at least 32-bit, is it 64-bit aligned ?
tst R0, #
; No, it is 32-bit aligned
ldrne R3, [R0]
eorne R3, r3, R1
strne R3, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned, is it 128-bit aligned ?
tst R0, #
; No, it is 64-bit aligned
ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 --->
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor32_loop1
add R2, R2, # arm_memxor32_loop2:
; Copy up to 3 remaining 32-bit values subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR END

ARM Memory Copy的更多相关文章

  1. Android笔记:C memory copy

    socket通讯问题之一: 在c中按字节发送数据  比如设备1状态(1字节)值(1字节)设备2状态(1字节)值(1字节)....这种格式拆分的问题 在c中可以利用struct的 memory copy ...

  2. 【ARM-Linux开发】Linux内存管理:ARM Memory Layout以及mmu配置

    原文:Linux内存管理:ARM Memory Layout以及mmu配置 在内核进行page初始化以及mmu配置之前,首先需要知道整个memory map. 1. ARM Memory Layout ...

  3. 阅读ARM Memory(L1/L2/MMU)笔记

    <ARM Architecture Reference Manual ARMv8-A>里面有Memory层级框架图,从中可以看出L1.L2.DRAM.Disk.MMU之间的关系,以及他们在 ...

  4. [转]Whirlwind Tour of ARM Assembly

    ref:http://www.coranac.com/tonc/text/asm.htm 23.1. Introduction Very broadly speaking, you can divid ...

  5. 附录:ARM 手册 词汇表

    来自:<DDI0406C_C_arm_architecture_reference_manual.pdf>p2723 能够查询到:“RAZ RAO WI 等的意思” RAZ:Read-As ...

  6. Windows And Video Memory

    MSDN Blogs > Zemblanity > Windows And Video Memory   Windows And Video Memory Tom_Mulcahy 11 F ...

  7. Off-heap Memory in Apache Flink and the curious JIT compiler

    https://flink.apache.org/news/2015/09/16/off-heap-memory.html   Running data-intensive code in the J ...

  8. 如何展开Linux Memory Management学习?

    Linux的进程和内存是两座大山,没有翻过这两座大山对于内核的理解始终是不完整的. 关于Linux内存管理,在开始之前做些准备工作. 首先bing到了Quora的<How can one rea ...

  9. ARM架构相关学习归纳总结

    ARM作为一个生态不仅提供了CPU Core,还提供了一系列相关的IP,比如GIC.MMU.AMBA.CoreLink.CoreSight.Mali等等. 其他还包括Debug工具.开发工具.IDE等 ...

随机推荐

  1. Django用ajax进行post请求

    post请求有两种,跨域和不跨域 1.不跨域 # 不跨域的 view.py def re_json(request): print(request.POST['name']) p1 = Product ...

  2. Servlet、ServletConfig、ServletContext深入学习

    1.Servlet学习 1.Servlet生命周期 Servlet 加载—>实例化—>服务—>销毁. init(servletConfig):(经过自己的测试发现会先调用这个而不是i ...

  3. css初始化minireset.css

    一个很小的现代CSS重置,涵盖了基本内容: 重置字体大小:这样使用语义标记不会影响样式 重置块边距:所以只有在需要时才应用间距 重置表格:这样表格数据只占用它所需的空间 保留了行内间距:因此,按钮和输 ...

  4. ckeditor:基本使用方法

    引用网址:http://blog.sina.com.cn/s/blog_6961ba9b0102wwye.html 1.获得值 var editor=CKEDITOR.replace( 'editor ...

  5. hdu 2065(泰勒展式)

    比赛的时候遇到这种题,只能怪自己高数学得不好,看着别人秒.... 由4种字母组成,A和C只能出现偶数次. 构造指数级生成函数:(1+x/1!+x^2/2!+x^3/3!……)^2*(1+x^2/2!+ ...

  6. NFS基础配置

    需要安装的包: rpc-bind nfs-utils 修改配置文件 /etc/exports 配置 /tmp *(ro) 修改配置之后记得重启服务 sudo systemctl restart nfs ...

  7. Ubuntu 12.04 下 Sublime Text 3 Build 3047 破解

    1. $sudo vim /opt/sublime_text/sublime_text 2. 将文件转成十六进制形式.在 vim 中输入: :%!xxd 3. 查找数字串 “4333 3342 303 ...

  8. spring mvc activemq

    http://websystique.com/spring/spring-4-jms-activemq-example-with-jmslistener-enablejms/

  9. checkbox复选框的一些深入研究与理解

    一.一开始的唠叨最近忙于开发,自淫于项目的一步步完工,心浮躁了.舍近而求远,兵家之大忌.我是不是应该着眼于眼前的东西,好好的静下心来,超过一般人的沉静与沉浸,研究最基本的东西呢?这番思考,让我找到了一 ...

  10. STM32 串口通信使用奇偶校验

    STM32串口通信如果使用奇偶校验,需要设置数据位长度为9bit USART_InitStructure.USART_BaudRate = 9600; USART_InitStructure.USAR ...