ARM Memory Copy
MODULE ARM_MEMORY PUBLIC ARM_MEMCPY
PUBLIC ARM_MEMSET
PUBLIC ARM_MEMSET8
PUBLIC ARM_MEMSET16
PUBLIC ARM_MEMSET32 SECTION .text:CODE:NOROOT()
CODE32 ;-------------------------------------------------------------------------------
; void ARM_MEMCPY(void* pDest, void* pSrc, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 pSrc
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R12 Used for data transfers
; R14 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMCPY:
;-------------------------------------------------------------------------------
cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R12, R0, #+ ; R0 = destination address
beq ARM_MEMCPY_DestIsDWordAligned ; Is destination address already word aligned ? ;-------------------------------------------------------------------------------
; Handle as much bytes as necessary to align destination address
;
ldrb R3, [R1], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R12, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R12 ; Adjust NumBytes : 1, 2, 3
ldrbls R12, [R1], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
strb R3, [R0], #+
ldrbcc R3, [R1], #+ ; Carry clear (CC)? -> We need one more byte
strbls R12, [R0], #+
sub R2, R2, #+ ; Adjust NumBytes
strbcc R3, [R0], #+ ; now destination address already is word aligned ;-------------------------------------------------------------------------------
; Choose best way to transfer data
;
ARM_MEMCPY_DestIsDWordAligned:
ands R3, R1, #+
beq ARM_MEMCPY_HandleBulkWordData ; If source and destination are aligned, use bulk word transfer subs R2, R2, #+
bcc ARM_MEMCPY_HandleTrailingBytes ; If we have less than one complete word left, use single byte transfer ldr R12, [R1, -R3]! ; Read first mis-aligned data word and word align source address
cmp R3, #+
beq ARM_MEMCPY_Loop16BitShift bhi ARM_MEMCPY_Loop24BitShift ;-------------------------------------------------------------------------------
; Handle data in units of word
;
; This is done by reading mis-aligned words from source address and
; shift them into the right alignment. After this the next data word
; will be read to complete the missing data part.
;
ARM_MEMCPY_Loop8BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop8BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop16BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop16BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ARM_MEMCPY_Loop24BitShift:
mov R3, R12, LSR #+ ; Shift data word into right position
ldr R12, [R1, #+]! ; Load next mis-aligned data word
subs R2, R2, #+ ; Decrement NumBytes
orr R3, R3, R12, LSL #+ ; Combine missing part of data to build full data word
str R3, [R0], #+ ; Store complete word
bcs ARM_MEMCPY_Loop24BitShift add R1, R1, #+ ; Adjust source address
b ARM_MEMCPY_HandleTrailingBytes ; Handle trailing bytes ;-------------------------------------------------------------------------------
; Handle large bulk data in blocks of 8 words (32 bytes)
;
ARM_MEMCPY_HandleBulkWordData:
subs R2, R2, #+0x20
stmdb SP!, {R4, LR}
bcc ARM_MEMCPY_HandleTrailingWords ARM_MEMCPY_LoopHandleBulkWord:
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
ldm R1!, {R3, R4, R12, LR} ; Transfer 16 bytes at once
stm R0!, {R3, R4, R12, LR}
subs R2, R2, #+0x20
bcs ARM_MEMCPY_LoopHandleBulkWord ;-------------------------------------------------------------------------------
; Handle trailing 7 words
;
ARM_MEMCPY_HandleTrailingWords:
movs R12, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R1!, {R3, R4, R12, LR} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmcs R0!, {R3, R4, R12, LR}
ldmmi R1!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set)
stmmi R0!, {R3, R4} movs R12, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmia SP!, {R4, LR}
ldrcs R3, [R1], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set)
strcs R3, [R0], #+
bxeq LR ;-------------------------------------------------------------------------------
; Handle trailing 3 bytes
;
; N Z C V Q ***** I F T M4 3 2 1 0
; N = bit[31]
; C = last shift bit : shift
; C = 1 ADD/CMN has carry bit
; C = 0 SUB/CMP no borrow bit
; xxxxxxxxxxxxxxxxxxxx10 << 31 : N=0, C=1
; xxxxxxxxxxxxxxxxxxxx01 << 31 : N=1, C=0
; BMI : N=1
; BCS : C=1
ARM_MEMCPY_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrbmi R2, [R1], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
ldrbcs R3, [R1], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
ldrbcs R12, [R1], #+
strbmi R2, [R0], #+
strbcs R3, [R0], #+
strbcs R12, [R0], #+
bx LR ;-------------------------------------------------------------------------------
; void ARM_MEMSET(void* pDest, U32 c, U32 NumBytes)
;
; Function description
; Copy data in memory from source address to destination address.
;
; Register usage:
;
; R0 pDest
; R1 c
; R2 NumBytes
;
; R3 Used for data transfers
; R4 Used for data transfers
; R5 Used for data transfers
; R6 Used for data transfers
;
; R13 SP
; R14 LR (contains return address)
; R15 PC
;
;-------------------------------------------------------------------------------
ARM_MEMSET:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls ARM_MEMSET_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq ARM_MEMSET_DestIsAligned ; Is destination address already word aligned ? ; Handle as much bytes as necessary to align destination address strb R1, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes
strbls R1, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address
sub R2, R2, #+ ; Adjust NumBytes
strbcc R1, [R0], #+ ; Carry clear (CC)? -> We need one more byte ; Choose best way to transfer data ARM_MEMSET_DestIsAligned: ; destination is aligned, use bulk word transfer ; Handle large bulk data in blocks of 8 words (32 bytes) ARM_MEMSET_HandleBulkWordData:
stmdb SP!, {R4, R5, R6} mov R3, R1, LSL #+ ; Transfer 16 bytes at once
mov R4, R1, LSL #+
mov R5, R1, LSL #+ subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc ARM_MEMSET_HandleTrailingWords ARM_MEMSET_LoopHandleBulkWord:
stm R0!, {R1, R3, R4, R5}
stm R0!, {R1, R3, R4, R5}
subs R2, R2, #+0x20
bcs ARM_MEMSET_LoopHandleBulkWord ; Handle trailing 7 words ARM_MEMSET_HandleTrailingWords:
movs R6, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
stmcs R0!, {R1, R3, R4, R5} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is set)
stmmi R0!, {R1, R3} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is set) movs R6, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strcs R1, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is set) ldmia SP!, {R4, R5, R6}
bxeq LR ; Z flag contain no Trailing Bytes ; Handle trailing 3 bytes ARM_MEMSET_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data
strbmi R1, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is set)
strbcs R1, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is set)
strbcs R1, [R0], #+
bx LR ; int ARM_MEMSET8(void* pDest, U32 c, U32 NumBytes);
;-------------------------------------------------------------------------------
ARM_MEMSET8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5}
cmp R2, #
blt ARM_MEMSET8_loop3 ; Alignment is unknown
tst R0, #
strneb R1, [R0], #
subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
orr R1, R1, R1, LSL #
tst R0, #
strneh R1, [R0], #
subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
orr R1, R1, R1, LSL #
mov R3, R1
cmp R2, #
blt ARM_MEMSET8_loop2
tst R0, #
strne R1, [R0], #
subne R2, R2, #
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET8_loop1
add R2, R2, # ARM_MEMSET8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R3}
tst R2, #
strne R1, [R0], #
and R2, R2, # ARM_MEMSET8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
subs R2, R2, #
strgeb R1, [R0], #
ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET16(void* pDest, U32 c, U32 NumHalfWords);
;-------------------------------------------------------------------------------
ARM_MEMSET16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, #
strneh R1, [R0], # ; xxxx-xx10 --->
subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
orr R1, R1, R1, LSL #
mov R4, R1 cmp R2, #
blt ARM_MEMSET16_HandleTrailingWords ; 7, 6, ... 0 tst R0, #
strne R1, [R0], # ; xxxx-x100 --->
subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R4} ; xxxx-1000 --->
subne R2, R2, # ; xxxx-0000 ---> ARM_MEMSET16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R3, R1 ARM_MEMSET16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET16_LoopHandleBulkWord
add R2, R2, # ARM_MEMSET16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, #
stmneia R0!, {R1, R4} tst R2, #
strne R1, [R0], # and R2, R2, # ARM_MEMSET16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, #
strgeh R1, [R0], # ldmia SP!, {R4, R5}
bx LR ; int ARM_MEMSET32(void* pDest, U32 c, U32 NumWords);
;-------------------------------------------------------------------------------
ARM_MEMSET32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5} cmp R2, #
blt ARM_MEMSET32_loop2 ; Alignment is known to be at least 32-bit
mov R3, R1 tst R0, #
strne R1, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned
tst R0, #
stmneia R0!, {R1, R3}
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
ARM_MEMSET32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, #
stmgeia R0!, {R1, R3, R4, R5}
bge ARM_MEMSET32_loop1
add R2, R2, # ARM_MEMSET32_loop2:
; Copy up to 3 remaining 32-bit values
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], #
subs R2, R2, #
strge R1, [R0], # ldmia SP!, {R4, R5}
bx LR ;-__arm void ARM_memxor(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor:
;-------------------------------------------------------------------------------
orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #+ ; R2 = NumBytes
bls arm_memxor_HandleTrailingBytes ; If we have less than one complete word, use single byte transfer ands R3, R0, #+ ; R0 = destination address
beq arm_memxor_DestIsAligned ; Is destination address already word aligned ? ;-
; Handle as much bytes as necessary to align destination address
;-
ldrb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eor R12, R12, r1
strb R12, [R0], #+ ; We need at least one byte to the next word alignment, so we read one. cmp R3, #+ ; Set condition codes according to the mis-alignment
add R2, R2, R3 ; Adjust NumBytes ldrbls R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorls R3, R3, r1
strbls R3, [R0], #+ ; Lower or same (LS)? -> We need one or two bytes to the next word aligned address sub R2, R2, #+ ; Adjust NumBytes ldrbcc R3, [R0], #+ ; We need at least one byte to the next word alignment, so we read one.
eorcc R3, R3, r1
strbcc R3, [R0], #+ ; Carry clear (CC)? -> We need one more byte ;-
; Choose best way to transfer data
;-
arm_memxor_DestIsAligned: ; destination is aligned, use bulk word transfer
;-
; Handle large bulk data in blocks of 8 words (32 bytes)
;-
arm_memxor_HandleBulkWordData:
stmdb SP!, {R4, R5, R6, R7} subs R2, R2, #+0x20 ; 32 Bytes = 8 DWords
bcc arm_memxor_HandleTrailingWords arm_memxor_LoopHandleBulkWord:
ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} ldm R0, {R3, R4, R5, R6}
eor r3, r3, r1
eor r4, r4, r1
eor r5, r5, r1
eor r6, r6, r1
stm R0!, {R3, R4, R5, R6} subs R2, R2, #+0x20
bcs arm_memxor_LoopHandleBulkWord ;-
; Handle trailing 7 words
;-
arm_memxor_HandleTrailingWords:
movs R7, R2, LSL # ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldmcs R0, {R3, R4, R5, R6}
eorcs r3, r3, r1
eorcs r4, r4, r1
eorcs r5, r5, r1
eorcs r6, r6, r1
stmcs R0!, {R3, R4, R5, R6} ; C flag contain bit 4 of NumBytes (transfer 16 bytes if it is xor) ldmmi R0, {R3, R4}
eormi r3, r3, r1
eormi r4, r4, r1
stmmi R0!, {R3, R4} ; N flag contain bit 3 of NumBytes (transfer 8 bytes if it is xor) movs R7, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrcs R3, [R0]
eorcs r3, r3, r1
strcs R3, [R0], #+ ; C flag contain bit 2 of NumBytes (transfer 4 bytes if it is xor) ldmia SP!, {R4, R5, R6, R7}
bxeq LR ; Z flag contain no Trailing Bytes ;-
; Handle trailing 3 bytes
;-
arm_memxor_HandleTrailingBytes:
movs R2, R2, LSL #+ ; Shift NumBytes left to use C and N flag of CPSR to conditional load/store data ldrmi R2, [R0]
eormi R2, R2, r1
strbmi R2, [R0], #+ ; N flag contain bit 0 of NumBytes (transfer 1 byte if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) ldrcs R2, [R0]
eorcs R2, R2, r1
strbcs R2, [R0], #+ ; C flag contain bit 1 of NumBytes (transfer 2 bytes if it is xor) bx LR ;-__arm int arm_memxor8(void* pDest, U32 c, U32 NumBytes);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor8:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} orr R1, R1, R1, LSL #+
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor8_loop3 ; Alignment is unknown
tst R0, # ldrneb R6, [R0]
eorne R6, r6, R1
strneb R6, [R0], # subne R2, R2, # ; Now we are 16-bit aligned (need to upgrade 'c' to 16-bit)
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # subne R2, R2, # ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
cmp R2, #
blt arm_memxor8_loop2
tst R0, # ldrne R6, [R0]
eorne R6, r6, R1
strne R6, [R0], #
; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit)
subne R2, R2, #
tst R0, # ldmneia R0, {R3, R6}
eorne R3, r3, R1
eorne R6, r6, R1
stmneia R0!, {R3, R6} subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor8_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor8_loop1
add R2, R2, # arm_memxor8_loop2:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor8_loop3:
; Copy up to 3 remaining bytes
subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R3, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # subs R2, R2, # ldrgeb R3, [R0]
eorge r3, r3, r1
strgeb R1, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor16(void* pDest, U32 c, U32 NumHalfWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor16:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6}
orr R1, R1, R1, LSL #+ cmp R2, #
blt arm_memxor16_HandleTrailingHalfWord ; 1 or 0 ; Alignment is known to be at least 16-bit
tst R0, # ldrneh R6, [R0]
eorne R6, r6, R1
strneh R6, [R0], # ; xxxx-xx10 ---> subne R2, R2, # ; xxxx-xx00 ; Now we are 32-bit aligned (need to upgrade 'c' to 32-bit )
cmp R2, #
blt arm_memxor16_HandleTrailingWords ; 7, 6, ... 0 tst R0, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # ; xxxx-x100 ---> subne R2, R2, # ; xxxx-x000 ---> ; Now we are 64-bit aligned
tst R0, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 ---> subne R2, R2, # ; xxxx-0000 ---> arm_memxor16_HandleBulkWordData:
; Now we are 128-bit aligned
mov R5, R1
mov R6, R1 arm_memxor16_LoopHandleBulkWord:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor16_LoopHandleBulkWord
add R2, R2, # arm_memxor16_HandleTrailingWords:
; Copy up to 3 remaining 32-bit values
tst R2, # ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} tst R2, # ldrne R3, [R0]
eorne r3, r3, r1
strne R3, [R0], # and R2, R2, # arm_memxor16_HandleTrailingHalfWord:
; Copy up to 1 remaining 16-bit value
subs R2, R2, # ldrgeh R3, [R0]
eorge r3, r3, r1
strgeh R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR ;-__arm int arm_memxor32(void* pDest, U32 c, U32 NumWords);
; r0 r1 r2
;-------------------------------------------------------------------------------
arm_memxor32:
;-------------------------------------------------------------------------------
stmdb SP!, {R4, R5, R6} cmp R2, #
blt arm_memxor32_loop2 ; Alignment is known to be at least 32-bit, is it 64-bit aligned ?
tst R0, #
; No, it is 32-bit aligned
ldrne R3, [R0]
eorne R3, r3, R1
strne R3, [R0], #
subne R2, R2, # ; Now we are 64-bit aligned, is it 128-bit aligned ?
tst R0, #
; No, it is 64-bit aligned
ldmneia R0, {R3, R4}
eorne r3, r3, r1
eorne r4, r4, r1
stmneia R0!, {R3, R4} ; xxxx-1000 --->
subne R2, R2, # ; Now we are 128-bit aligned
mov R4, R1
mov R5, R1
arm_memxor32_loop1:
; Copy 4 32-bit values per loop iteration
subs R2, R2, # ldmgeia R0, {R3, R4, R5, R6}
eorge r3, r3, r1
eorge r4, r4, r1
eorge r5, r5, r1
eorge r6, r6, r1
stmgeia R0!, {R3, R4, R5, R6} bge arm_memxor32_loop1
add R2, R2, # arm_memxor32_loop2:
; Copy up to 3 remaining 32-bit values subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # subs R2, R2, #
ldrge R3, [R0]
eorge r3, r3, r1
strge R3, [R0], # ldmia SP!, {R4, R5, R6}
bx LR END
ARM Memory Copy的更多相关文章
- Android笔记:C memory copy
socket通讯问题之一: 在c中按字节发送数据 比如设备1状态(1字节)值(1字节)设备2状态(1字节)值(1字节)....这种格式拆分的问题 在c中可以利用struct的 memory copy ...
- 【ARM-Linux开发】Linux内存管理:ARM Memory Layout以及mmu配置
原文:Linux内存管理:ARM Memory Layout以及mmu配置 在内核进行page初始化以及mmu配置之前,首先需要知道整个memory map. 1. ARM Memory Layout ...
- 阅读ARM Memory(L1/L2/MMU)笔记
<ARM Architecture Reference Manual ARMv8-A>里面有Memory层级框架图,从中可以看出L1.L2.DRAM.Disk.MMU之间的关系,以及他们在 ...
- [转]Whirlwind Tour of ARM Assembly
ref:http://www.coranac.com/tonc/text/asm.htm 23.1. Introduction Very broadly speaking, you can divid ...
- 附录:ARM 手册 词汇表
来自:<DDI0406C_C_arm_architecture_reference_manual.pdf>p2723 能够查询到:“RAZ RAO WI 等的意思” RAZ:Read-As ...
- Windows And Video Memory
MSDN Blogs > Zemblanity > Windows And Video Memory Windows And Video Memory Tom_Mulcahy 11 F ...
- Off-heap Memory in Apache Flink and the curious JIT compiler
https://flink.apache.org/news/2015/09/16/off-heap-memory.html Running data-intensive code in the J ...
- 如何展开Linux Memory Management学习?
Linux的进程和内存是两座大山,没有翻过这两座大山对于内核的理解始终是不完整的. 关于Linux内存管理,在开始之前做些准备工作. 首先bing到了Quora的<How can one rea ...
- ARM架构相关学习归纳总结
ARM作为一个生态不仅提供了CPU Core,还提供了一系列相关的IP,比如GIC.MMU.AMBA.CoreLink.CoreSight.Mali等等. 其他还包括Debug工具.开发工具.IDE等 ...
随机推荐
- python导出数据到excel
1,SMTP发送带excel附件的邮件: def sendMail(filename, addressee): """ :param content: 发送内容 :par ...
- 2018-11-3& maven
https://www.cnblogs.com/clsn/p/7944116.html#auto_id_10 http://www.runoob.com/maven/maven-creating-pr ...
- memcache 键名的命名规则以及和memcached的区别
2014年3月27日 07:47:46 Keys---- Data stored by memcached is identified with the help of a key. A keyis ...
- 08 Go 1.8 Release Notes
Go 1.8 Release Notes Introduction to Go 1.8 Changes to the language Ports Known Issues Tools Assembl ...
- PhpStorm,Pycharm,Goland破解
phpstorm是一个轻量级且便捷的PHP IDE,其旨在提供用户效率,可深刻理解用户的编码,提供智能代码补全,快速导航以及即时错误检查.不但是php开发的利器,前端开发也是毫不逊色的.下面记录Php ...
- EntityFramework系列:SQLite的CodeFrist和RowVersion
没什么好说的,能支持DropCreateDatabaseIfModelChanges和RowVersion的Sqlite谁都想要.EntityFramework7正在添加对Sqlite的支持,虽然EF ...
- 【LOJ】#2122. 「HEOI2015」小 Z 的房间
题解 又是一道取模不给质数的毒瘤矩阵树题 不会写分数类--然后发现了网上过于神仙的题解类似与辗转相除的这样把某一个位置消成0 orz 代码 #include <bits/stdc++.h> ...
- redis 使用管道提升写入的性能[pipeline]
看了手册的都知道multi这个命令的作用就好比是mysql的事务的功能,但是大家都知道事务吗,就是在操作的过程中,把整个操作当作一个原子来处理,避免由于中途出错而导致最后产生的数据不一致,而产生BUG ...
- 归并排序 递归and非递归
什么是归并排序 归并排序其实就做两件事: “分解”——将序列每次折半划分. “合并”——将划分后的序列段两两合并后排序. 首先我们来看一下分解是怎样实现的呢? // 递归退出条件,及left ...
- java数据结构之树
树定义和基本术语定义树(Tree)是n(n≥0)个结点的有限集T,并且当n>0时满足下列条件: (1)有且仅有一个特定的称为根(Root)的结点: (2)当n>1时,其余结 ...