.globl inet_addr_asm
inet_addr_asm:
movq $0xff00000000000000, %r8
movq $0x3000000000000000, %r9 # load ASCII '0' into register nine
movq $10, %r10
movq $100, %r11
movq 8(%rdi), %rsi # load eight byes from string address
shlq $8, %rsi # get rid of prefixed '.' (if any)
movq %rsi, %rcx # load string into %rcx
andq %r8, %rcx # get first byte of string in %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
shlq $8, %rsi
movq %rsi, %rcx
andq %r8, %rcx
addq %rcx, %rcx
jnz found_nonzero
jmp all_zeros
found_nonzero:
slr0:
movq %rsi, %rcx
shlq $8, %rsi
andq %r8, %rcx
subq %r9, %rcx
shrq $32, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr0_nlr1
movq (%rdi), %rsi
jmp nlr0_1
slr0_nlr1:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb slr1
mulq %r10
shrq $32, %rax
addq %rax, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr0_nlr2
movq (%rdi), %rsi
jmp nlr0_2
slr0_nlr2:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb slr1
mulq %r11
shrq $32, %rax
addq %rax, %rcx
shlq $8, %rsi
slr1:
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr1_nlr0
movq (%rdi), %rsi
jmp nlr1_0
slr1_nlr0:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb slr2
shrq $40, %rax
addq %rax, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr1_nlr1
movq (%rdi), %rsi
jmp nlr0_1
slr1_nlr1:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb slr2
mulq %r10
shrq $40, %rax
addq %rax, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr1_nlr2
movq (%rdi), %rsi
jmp nlr1_2
slr1_nlr2:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb slr2
mulq %r11
shrq $40, %rax
addq %rax, %rcx
shlq $8, %rsi
slr2:
movq %rsi, %rdx
addq %rdx, %rdx
jnz slr2_nlr0
movq (%rdi), %rsi
jmp nlr2_0
slr2_nlr0:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr3
shrq $48, %rax
addq %rax, %rcx
jmp nlr2_1
nlr0_1:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr1
mulq %r10
shrq $32, %rax
addq %rax, %rcx
nlr0_2:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr1
mulq %r11
shrq $32, %rax
addq %rax, %rcx
shlq $8, %rsi
nlr1:
nlr1_0:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr2
shrq $40, %rax
addq %rax, %rcx
nlr1_1:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr2
mulq %r10
shrq $40, %rax
addq %rax, %rcx
nlr1_2:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr2
mulq %r11
shrq $40, %rax
addq %rax, %rcx
shlq $8, %rsi
nlr2:
nlr2_0:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr2
shrq $48, %rax
addq %rax, %rcx
nlr2_1:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr3
mulq %r10
shrq $48, %rax
addq %rax, %rcx
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
jb nlr3
mulq %r11
shrq $48, %rax
addq %rax, %rcx
shlq $8, %rsi
nlr3:
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
shrq $56, %rax
addq %rax, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jz nlr_end
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
mulq %r10
shrq $56, %rax
addq %rax, %rcx
movq %rsi, %rdx
addq %rdx, %rdx
jz nlr_end
movq %rsi, %rax
shlq $8, %rsi
andq %r8, %rax
subq %r9, %rax
mulq %r11
shrq $56, %rax
addq %rax, %rcx
nlr_end:
movq %rcx, %rax
ret
all_zeros:
movq (%rdi), %rsi
movq $0x0030003000300030, %r9
subq %r9, %rsi
shrq $8, %r8
movq %rsi, %rax
andq %r8, %rax
shrq $24, %rax
movq %rsi, %rcx
shrq $16, %r8
andq %r8, %rcx # 0x3000300030003000
shrq $16, %rcx
orq %rcx, %rax
movq %rsi, %rcx
shrq $16, %r8
andq %r8, %rcx
shrq $8, %rcx
orq %rcx, %rax
shrq $16, %r8
andq %r8, %rsi
orq %rsi, %rax
ret
Accepts one argument, the pointer to the string. The string is assumed to be zero-padded to 16 bytes. The code is in amd64 assembly, uses no vendor specific processor things AFAIK. Although I did not extensively test it it seems to be working. Any advice is appreciated.