1; Fast String Scan: Find First Occurrence of a Character
2; Scans a null-terminated string for the first occurrence of a target character.
3; Optimized for speed by processing 8 bytes at a time.
4
5section .data
6; No specific data needed for this function.
7
8section .text
9global find_char_fast
10
11; Function: find_char_fast
12; Finds the first occurrence of a target character in a null-terminated string.
13; Input:
14; RDI = Pointer to the null-terminated string (source)
15; RSI = Target character (byte)
16; Output:
17; RAX = Pointer to the first occurrence of the character, or 0 if not found.
18find_char_fast:
19; --- Prologue ---
20push rbp
21mov rbp, rsp
22push rbx ; Save callee-saved register (source pointer)
23push r12 ; Save callee-saved register (target char)
24push r13 ; Save callee-saved register (temp for 8 bytes)
25push r14 ; Save callee-saved register (temp for 8 bytes)
26push r15 ; Save callee-saved register (mask for character)
27
28mov rbx, rdi ; Save source string pointer in RBX
29mov r12, rsi ; Save target character in R12
30
31; --- Handle edge case: empty string ---
32cmp byte [rbx], 0
33je .not_found
34
35; --- Main scanning loop (unrolling factor of 8 bytes) ---
36; We will load 8 bytes at a time and check if the character is present.
37; This requires creating a mask of the target character.
38
39; Create a 64-bit mask where all bytes are the target character.
40; Example: if target is 'A' (0x41), mask will be 0x4141414141414141.
41mov r15, r12 ; r15 = target character
42shl r15, 8
43or r15, r12
44shl r15, 8
45or r15, r12
46shl r15, 8
47or r15, r12
48shl r15, 8
49or r15, r12
50shl r15, 8
51or r15, r12
52shl r15, 8
53or r15, r12
54; Now r15 contains the 64-bit mask.
55
56.scan_loop:
57; Load 8 bytes from the string
58mov r13, [rbx] ; Load 8 bytes into r13
59
60; Check for null terminator within the 8 bytes.
61; If a null byte is found, we need to process it carefully.
62; A simple way is to check if the null terminator is present.
63; If it is, we might need to fall back to byte-by-byte processing for the last few bytes.
64; For simplicity, we'll assume the string is long enough or the null terminator
65; is handled by the final byte-by-byte check.
66
67; Compare the loaded 8 bytes with the mask.
68; If (loaded_bytes XOR mask) has any byte with all bits zero, then the character is present.
69; This is equivalent to checking if (loaded_bytes AND mask) == mask.
70; However, a more direct check is often used: XORing and checking for zero bytes.
71; A common trick: if (loaded_bytes - mask) has a sign bit set for any byte, the char is present.
72; Or, more simply: if (loaded_bytes XOR mask) has any zero byte, the char is present.
73
74; Let's use a simpler check: compare loaded bytes with the target character byte-wise.
75; This is less efficient than the XOR/mask trick but easier to implement.
76; A better approach for speed is to use SSE2 instructions like `pshufb` or `pcmpeqb`.
77; For pure x86, we can iterate through the 8 bytes.
78
79mov r14, rbx ; r14 = current position in string
80mov rcx, 8 ; loop counter for 8 bytes
81
82.byte_check_loop:
83cmp rcx, 0
84je .next_8_bytes ; If all 8 bytes checked, move to next block
85
86cmp byte [r14], r12 ; Compare current byte with target character
87je .found ; If match, we found it
88
89; Check for null terminator
90cmp byte [r14], 0
91je .not_found ; If null terminator found, character not present
92
93inc r14 ; Move to next byte
94dec rcx ; Decrement byte counter
95jmp .byte_check_loop
96
97.next_8_bytes:
98add rbx, 8 ; Advance string pointer by 8 bytes
99; Check if the next 8 bytes start with a null terminator
100cmp byte [rbx], 0
101je .not_found
102jmp .scan_loop ; Continue scanning
103
104.found:
105mov rax, r14 ; Return pointer to the found character
106jmp .end_scan
107
108.not_found:
109mov rax, 0 ; Return 0 (null pointer) if not found
110
111.end_scan:
112; --- Epilogue ---
113pop r15
114pop r14
115pop r13
116pop r12
117pop rbx
118pop rbp
119ret