Artifact 51430ede44762358cde4bb01ce4f3f9ab5639536:
- File
freshlib/data/strlib.asm
— part of check-in
[1aa7133438]
at
2013-01-12 20:11:31
on branch FreshLibDev
— Merged with the latest uConfig branch. It updates the uConfig API, and breaks some backward compatibility.
All projects that uses uConfig should be updated now. (Fortunately they are not so many - better now than later).
The latest uConfig API is advanced and provides many new features.
Additionally, StrNormalizePath procedure was introduced in StrLib.asm that can process paths with random separators. (user: johnfound size: 66627) [more...]
; _______________________________________________________________________________________ ;| | ;| ..::FreshLib::.. Free, open source. Licensed under "Fresh artistic license." | ;|_______________________________________________________________________________________| ; ; Description: OS independent string manipulation library. ; ; Target OS: Any ; ; Dependencies: memory.asm; arrays.asm ; ; Notes: ; ;_________________________________________________________________________________________ module "String library" STR_MINSTRLEN = 127 ; must be N*8-1 STR_SEARCH_ONE_ATEMPT = 100 struc string { .capacity dd ? .len dd ? label .data byte } virtual at -(sizeof.string) string string sizeof.string = $-string end virtual ; NumToStr flags ntsSigned = $00000 ntsUnsigned = $10000 ntsFixedWidth = $20000 ntsBin = $0200 ntsQuad = $0400 ntsOct = $0800 ntsDec = $0a00 ntsHex = $1000 ; Global variable, storing parameters of dynamic strings list. uglobal if used InitStrings ptrStrTable dd ? ; StrLib strings arrray. Contains pointers to the memory allocated for strings. if options.Threads StrMutex TMutex ; mutex that allows thread safety of StrLib end if end if endg ; < OS independent library functions > ;************************************************************************************ ; Allocates memory for string table and allocates memory for strings. ; Start it before any work with strings. (Or better use InitializeAll macro) ; Returns 0 if failed to allocate needed memory. ;************************************************************************************ if used ptrStrTable initialize InitStrings begin StrLib = 1 if options.Threads stdcall MutexCreate, 0, StrMutex end if stdcall CreateArray, 4 jc .finish mov [ptrStrTable], eax mov [eax+TArray.lparam], 0 ; lParam is the last allocated handle number .finish: if options.Threads stdcall MutexRelease, StrMutex end if return endp end if ;************************************************************************************** ; Frees all memory used for strings library ; Call it before exit of the program or use FinalizeAll macro. ;************************************************************************************** if used InitStrings finalize FreeStrings begin if options.Threads stdcall WaitForMutex, StrMutex, -1 end if mov esi, [ptrStrTable] mov ecx, [esi+TArray.count] xor ebx,ebx .freeloop: dec ecx js .endloop cmp [esi+TArray.array+4*ecx], ebx je .freeloop stdcall FreeMem, [esi+TArray.array+4*ecx] jmp .freeloop .endloop: stdcall FreeMem, esi mov [ptrStrTable], ebx if options.Threads stdcall MutexDestroy, StrMutex end if return endp end if ;************************************************************************************** ; Returns: ; CF=0 no error; eax = pointer in memory of the hString ; CF=1 on error - hString is handle, but is not in the table.* ;************************************************************************************** proc StrPtr, .hString begin mov eax, [.hString] cmp eax, $c0000000 jb .pointer if options.Threads stdcall WaitForMutex, StrMutex, -1 end if push ebx xor eax, $c0000000 mov ebx, [ptrStrTable] cmp eax, [ebx+TArray.count] jae .notfound mov eax, [ebx+TArray.array+4*eax] test eax, eax jz .notfound add eax, sizeof.string pop ebx clc .finish: if options.Threads stdcall MutexRelease, StrMutex end if return .pointer: clc return .notfound: pop ebx stc if options.Threads jmp .finish else return end if endp ;************************************************************************************** ; Creates new empty string and returns handle ; Return: handle of the new created string. ;************************************************************************************** proc StrNew begin push ecx edx esi if options.Threads stdcall WaitForMutex, StrMutex, -1 end if ; Search for first empty place. mov edx, [ptrStrTable] mov ecx, STR_SEARCH_ONE_ATEMPT ; search only limited count of items cmp ecx, [edx+TArray.count] jb @f mov ecx, [edx+TArray.count] @@: mov esi,[edx+TArray.lparam] xor eax,eax .search: dec ecx js .notfound inc esi cmp esi, [edx+TArray.count] jne @f xor esi,esi @@: cmp [edx+TArray.array+4*esi], eax je .found jmp .search .notfound: mov [edx+TArray.lparam], esi ; store the place where the search ends this time. mov esi, [edx+TArray.count] stdcall AddArrayItems, edx, 1 mov [ptrStrTable], edx .found: mov [edx+TArray.lparam], esi stdcall GetMem, STR_MINSTRLEN + sizeof.string + 1 mov [edx+TArray.array+4*esi], eax mov [eax+sizeof.string+string.capacity], STR_MINSTRLEN mov eax, esi or eax, $c0000000 if options.Threads stdcall MutexRelease, StrMutex end if pop esi edx ecx return endp ;************************************************************************** ; Deletes the string if it is possible. ;************************************************************************** proc StrDel, .hString begin pushf push eax ecx esi if options.Threads stdcall WaitForMutex, StrMutex, -1 end if mov esi, [ptrStrTable] mov ecx, [.hString] jecxz .finish cmp ecx, $c0000000 jb .pointer xor ecx, $c0000000 cmp ecx, [esi+TArray.count] jae .finish .free: stdcall FreeMem, [esi+TArray.array+4*ecx] mov [esi+TArray.array+4*ecx], 0 .finish: if options.Threads stdcall MutexRelease, StrMutex end if pop esi ecx eax popf return .pointer: ; search the pointer in the table. lea eax, [ecx-sizeof.string] mov ecx, [esi+TArray.count] .search: dec ecx js .finish cmp [esi+TArray.array+4*ecx], eax jne .search jmp .free endp ;************************************************************************** ; Duplicates given string, and returns a handle to new one ;************************************************************************** proc StrDup, .hSource begin stdcall StrNew stdcall StrCopy, eax, [.hSource] return endp proc StrDupMem, .ptrSource begin pushf push ecx edx esi edi cld mov esi, [.ptrSource] stdcall StrNew mov edx, eax mov ecx, [.ptrSource] .len: cmp byte [ecx], 0 lea ecx, [ecx+1] jne .len sub ecx, esi dec ecx lea eax, [ecx+4] stdcall StrSetCapacity, edx, eax mov edi, eax mov [edi+string.len], ecx rep movsb xor eax, eax stosd mov eax, edx pop edi esi edx ecx popf return endp ;************************************************************************** ; Arguments: ; hString - handle or pointer to the string (static or dynamic) ; Returns: ; CF=0; eax = length of the string in bytes. ; CF=1; eax = 0 in case, the handle of the string can't be found in the ; string table or the pointer is NULL. ; ; If pointer is passed the the procedure, it should be dword aligned and ; all bytes of the string including zero terminator to be accessed on ; qword boundary. Although, the zero terminator can be single byte zero. ; ; The performance of the procedure is high for pointers and ; instant for handles (the StrLib created string doesn't need any ; search, because the length is precomputed) ;************************************************************************** proc StrLen, .hString ; proc StrLen [hString] begin mov eax, [.hString] cmp eax, $c0000000 jb .pointer stdcall StrPtr, eax jc .error mov eax, [eax+string.len] clc return .error: xor eax, eax stc return .pointer: push ecx edx esi edi ; align on dword .byte1: test eax, 3 jz .scan cmp byte [eax], 0 je .found inc eax jmp .byte1 .scan: mov ecx, [eax] mov edx, [eax+4] lea eax, [eax+8] lea esi, [ecx-$01010101] lea edi, [edx-$01010101] not ecx not edx and esi, ecx and edi, edx and esi, $80808080 and edi, $80808080 or esi, edi jz .scan sub eax, 9 ; byte 0 was found: so search by bytes. .byteloop: lea eax, [eax+1] cmp byte [eax], 0 jne .byteloop .found: sub eax, [.hString] clc pop edi esi edx ecx return endp ; This procedure calculates the length of zero terminated string and "fixes" [string.len] field. ; StrFixLen should be call when the content of the string is created by call to external to StrLib ; procedures - for example Win32 API functions. ; proc StrFixLen, .hstring begin push eax ecx stdcall StrPtr, [.hstring] mov ecx, eax stdcall StrLen, eax mov [ecx+string.len], eax pop ecx eax return endp ;*************************************************************************** ; If the hString is larger than length - do nothing ; If the hString is smaller than length -> set the length of string to length ; returns pointer to the new (old) string ; ; Arguments: ; hString - string handle. /not pointer!/ ; capacity - new string length. ; Returns: ; eax: pointer to the string. ; CF: error flag. If 1 the pointer to the string is returned, but ; the capacity is not changed. ;*************************************************************************** proc StrSetCapacity, .hString, .capacity begin mov eax, [.hString] cmp eax, $c0000000 jb .exit ; CF=1 - it is error push ebx ecx esi mov esi, eax xor esi, $c0000000 if options.Threads stdcall WaitForMutex, StrMutex, -1 end if mov ebx, [ptrStrTable] cmp esi, [ebx+TArray.count] jae .pointer mov eax, [ebx+TArray.array+4*esi] ; pointer to the string. mov ecx, [.capacity] cmp ecx, STR_MINSTRLEN jge @f mov ecx, STR_MINSTRLEN @@: cmp [eax+sizeof.string+string.capacity], ecx jae .sizeok shl ecx, 1 add ecx, sizeof.string+4+7 and cl, $f8 ; align the size to 8 bytes stdcall ResizeMem, eax, ecx jc .error lea ecx, [ecx-(sizeof.string+4)] mov [ebx+TArray.array+4*esi], eax mov [eax+sizeof.string+string.capacity], ecx .sizeok: add eax, sizeof.string .finish: if options.Threads stdcall MutexRelease, StrMutex end if pop esi ecx ebx .exit: return .error: int3 add eax, sizeof.string .pointer: stc jmp .finish endp ;*************************************************************************************** ; Copies source to destination string. ; Arguments: ; dest - destination string (handle only) ; source - source string (handle or pointer) ; Returns: nothing ;*************************************************************************************** proc StrCopy, .dest, .source begin push esi edi eax ecx stdcall StrLen, [.source] mov ecx, eax stdcall StrSetCapacity, [.dest], ecx mov edi, eax jc .error stdcall StrPtr, [.source] mov esi, eax mov [edi+string.len], ecx inc ecx mov eax, ecx shr ecx, 2 rep movsd mov ecx, eax and ecx, 3 rep movsb clc .finish: pop ecx eax edi esi return .error: int3 jmp .finish endp proc CharLCase, .char begin mov eax, [.char] cmp eax, 'A' jb .end cmp eax, 'Z' ja .end add eax, 'a'-'A' .end: return endp ;*************************************************************************************** ; Compares two strings for greater, equal or less. ; Returns eax = 0 if the strings are equal. ; eax = 1 if .str1 is grater than .str2 ; eax = -1 if .str1 is less than .str2 ;*************************************************************************************** proc StrCompSort2, .str1, .str2, .fCaseSensitive begin push ebx ecx esi edi mov eax, [.str1] mov ecx, [.str2] cmp eax, ecx je .equal stdcall StrPtr, [.str1] mov esi,eax stdcall StrPtr, [.str2] mov edi,eax .cmp_loop: stdcall DecodeUtf8, [esi] add esi, edx cmp [.fCaseSensitive], 0 jne @f stdcall CharLCase, eax @@: mov ecx, eax stdcall DecodeUtf8, [edi] add edi, edx cmp [.fCaseSensitive], 0 jne @f stdcall CharLCase, eax @@: test ecx, ecx jz .endstr test eax, eax jz .endstr cmp ecx, eax je .cmp_loop ja .greater jmp .less .endstr: cmp ecx, eax ja .greater jb .less .equal: xor eax, eax .finish: pop edi esi ecx ebx return .greater: mov eax, 1 jmp .finish .less: mov eax, -1 jmp .finish endp ;*************************************************************************************** ; Compares two strings - case sensitive. ; Returns CARRY = 1 if the strings are equal. ; Returns CARRY = 0 if the strings are different. ; ; As long as this function uses StrLen, it will be very fast on handles and relatively ; slow on pointers. ;*************************************************************************************** proc StrCompCase, .str1, .str2 begin push eax ecx esi edi mov eax, [.str1] mov ecx, [.str2] cmp eax, ecx je .equal test eax, eax jz .noteq test ecx, ecx jz .noteq stdcall StrLen, eax push eax stdcall StrLen, ecx pop ecx cmp eax, ecx jne .noteq stdcall StrPtr, [.str1] mov esi,eax stdcall StrPtr, [.str2] mov edi,eax mov eax, ecx shr ecx, 2 repe cmpsd jne .noteq mov ecx, eax and ecx, 3 repe cmpsb jne .noteq .equal: stc pop edi esi ecx eax return .noteq: clc pop edi esi ecx eax return endp ;*************************************************************************************** ; Compares two strings - case NOT sensitive. ; Returns CARRY = 1 if the strings are equal. ; Returns CARRY = 0 if the strings are different. ; ; relatively slow, especially on equal strings, passed as pointers - this is the worst ; case. The nontrivial best case is "strings with different lengths passed as handles." ;*************************************************************************************** proc StrCompNoCase, .str1, .str2 begin push eax ebx ecx edx esi edi mov eax, [.str1] mov ecx, [.str2] cmp eax, ecx je .equal test eax, eax jz .noteq test ecx, ecx jz .noteq stdcall StrLen, eax push eax stdcall StrLen, ecx pop ecx cmp eax, ecx jne .noteq stdcall StrPtr, [.str1] mov esi,eax stdcall StrPtr, [.str2] mov edi,eax mov ebx, ecx shr ecx, 2 and ebx, 3 .dword: dec ecx js .byte mov eax, [esi] mov edx, [edi] and eax, $40404040 and edx, $40404040 shr eax, 1 shr edx, 1 or eax, [esi] or edx, [edi] lea esi, [esi+4] lea edi, [edi+4] cmp eax, edx jne .noteq jmp .dword .byte: dec ebx js .equal mov al, [esi] mov ah, [edi] and eax, $ffff mov edx, eax and eax, $4040 shr eax, 1 or eax, edx inc esi inc edi cmp al, ah je .byte .noteq: clc pop edi esi edx ecx ebx eax return .equal: stc pop edi esi edx ecx ebx eax return endp ;********************************************************** ; Creates string and assigns it to variable. If variable ; already contains string handle, the old string will be ; used. ; Arguments: ; [ptrHString] - variable containing string handle. ; ptrSource - pointer to the source for string. ;********************************************************** proc SetString, .ptrHString, .ptrSource begin push eax esi mov esi, [.ptrHString] mov eax, [esi] test eax, eax jnz @f stdcall StrNew @@: mov [esi], eax stdcall StrPtr, eax mov dword [eax], 0 mov [eax+string.len], 0 cmp [.ptrSource], 0 je .finish stdcall StrCopy, [esi], [.ptrSource] .finish: pop esi eax return endp ;********************************************************************************** ; StrCat appends one string to another ; Arguments: ; dest - destination string (handle only) ; source - source string ;********************************************************************************** proc StrCat, .dest, .source begin push eax ebx ecx esi edi stdcall StrLen, [.dest] mov ebx,eax ; store dest length in ebx stdcall StrLen, [.source] mov esi, eax lea ecx, [eax+ebx] stdcall StrSetCapacity, [.dest], ecx mov [eax+string.len], ecx lea edi, [eax+ebx] stdcall StrPtr, [.source] lea ecx, [esi+1] mov esi, eax mov ebx, ecx shr ecx, 2 rep movsd mov ecx, ebx and ecx, 3 rep movsb pop edi esi ecx ebx eax return endp ;********************************************************************************** ; StrCharPos returns a pointer to the first occurence of a given char ; in specified string ; Arguments: ; Char - char to look for ; hString - string to search ; Returns: a pointer to the char in source, or NULL if char doesn't occur ; in given string ;********************************************************************************** proc StrCharPos, .hString, .char begin push esi stdcall StrPtr, [.hString] mov esi,eax mov ah, byte [.char] ; xchg al,ah .search: mov al,[esi] inc esi or al,al jz .not_found cmp al,ah jne .search mov eax, esi dec eax pop esi return .not_found: xor eax,eax pop esi return endp ;********************************************************************************** ; StrPos returns a pointer to the first occurence of a pattern string ; in another string ; Arguments: ; hPattern - 'pattern' string ; hString - string to search ; Returns: a pointer to the pattern string in source , or NULL if pattern string ; doesn't occur in the string to search ;********************************************************************************** proc StrPos, .hString, .hPattern begin push ebx ecx edx esi edi ; esp = esp -20 mov esi,[.hPattern] ; mov esi,[hPattern] mov edi,[.hString] ; mov edi,[hString] stdcall StrLen, edi mov ebx,eax ; now ebx holds lenght of the string to search stdcall StrLen, esi mov edx,eax ; now edx holds length of the pattern string cmp edx, ebx ja .not_found ; if the pattern is longer than the string stdcall StrPtr, esi mov esi,eax ; put pointer to the pattern str in esi stdcall StrPtr,edi mov edi,eax ; put pointer to the search str in edi lodsb ; load first character of the pattern mov ecx,ebx ; mov ebx,edx ; put str_len(pattern)-1 in ebx dec ebx ; sub ecx, ebx ; there is no need to search to the end, but only to len(string)-len(pattern)-1 .search: repne scasb jne .not_found ; cmp ecx,ebx ; jb .not_found push edi esi ecx or ebx,ebx ; ebx==0 means that we were searching for one jz .got_it ; character. We found it, so we stop. mov ecx,ebx repe cmpsb jne .not_match .got_it: pop ecx esi edi dec edi mov eax,edi .ret: pop edi esi edx ecx ebx return .not_match: pop ecx esi edi jmp .search .not_found: xor eax,eax jmp .ret endp proc StrCopyPart, .dest, .source, .pos, .len begin push eax ecx esi edi stdcall StrPtr, [.source] mov esi, eax stdcall StrLen, [.source] mov ecx, eax mov eax, [.pos] cmp eax, ecx jae .cleardest ; sub ecx, [.pos] mov eax, [.len] ; ecx = min(ecx, eax) sub eax, ecx sbb edi, edi and edi, eax add ecx, edi add esi, [.pos] stdcall StrSetCapacity, [.dest], ecx jc .finish mov edi, eax mov [edi+string.len], ecx push ecx shr ecx, 2 rep movsd pop ecx and ecx, 3 rep movsb lea ecx, [edi+3] and cl, $fc sub ecx, edi xor eax, eax rep stosb .finish: pop edi esi ecx eax return .cleardest: stdcall StrSetCapacity, [.dest], STR_MINSTRLEN mov [eax+string.len], 0 mov dword [eax], 0 jmp .finish endp ;********************************************************************************** ; StrExtract copies the part of [string] from [index] with lenght in [len] ; Returns handle to new created string. ;********************************************************************************** proc StrExtract, .string, .pos, .len begin stdcall StrNew stdcall StrCopyPart, eax, [.string], [.pos], [.len] return endp ;__________________________________________________________________________________ ; Splits the string on two strings, at position [.pos] ; Arguments: ; .pString - pointer to string to be splitted. ; .pos - position where to split the string. ; Returns: ; eax - handle to the new created string with second part of the string. ; the original string does not reallocate memory and it's capacity ; and the pointer will remains the same. ;__________________________________________________________________________________ proc StrSplit, .hString, .pos begin stdcall StrExtract, [.hString], [.pos], -1 stdcall StrTrim, [.hString], [.pos] return endp ;__________________________________________________________________________________ ; Trims the string at position [.pos] ; Arguments: ; .pString - pointer to string to be splitted. ; .pos - position where to split the string. ; Returns: nothing. ;__________________________________________________________________________________ proc StrTrim, .hString, .pos begin push eax ecx edi mov ecx, [.pos] stdcall StrLen, [.hString] cmp eax, ecx jbe .endtrim stdcall StrPtr, [.hString] mov [eax+string.len], ecx ; new length of the source string. add eax, ecx mov edi, eax lea ecx, [eax+3] and cl, $fc sub ecx, eax jnz @f inc ecx @@: xor eax, eax cld rep stosb .endtrim: pop edi ecx eax return endp ;__________________________________________________________________________________ ; StrInsert inserts one string into another at specified pos ; Arguments: ; dest - destination where the source will be inserted. ; source - string to insert ; pos - where to insert. ; Returns: ; nothing. ;__________________________________________________________________________________ proc StrInsert, .dest, .source, .pos begin push eax stdcall StrSplit, [.dest], [.pos] push eax eax stdcall StrCat, [.dest], [.source] stdcall StrCat, [.dest] ; source from the stack. stdcall StrDel; from the stack. pop eax return endp ; TODO: ; String case functions are giving weird results in linux, so ; here are two functons I wrote some time ago. ; I have tested the following functions in win32 and worked well ; Perhaps it is time to fully support UTF encoded strings. ; These functions here are faster (20-25%), but the results are ; exactly the same as the strlib ones. ; pelaillo ; ----------------------------------------------- ; str_ucase: ; Author: pelaillo ; Date: Jan. 16, 2002 ; Converts also accented characters: ÑÚ <--> ñú ; ----------------------------------------------- proc StrUCase2, .hString begin push eax edx edi stdcall StrPtr, [.hString] mov edi, eax .str_ucase: mov eax, [edi] mov edx, eax and edx, 40404040h ror edx, 1 xor edx, -1 and eax, edx mov [edi], eax add edi, 4 lea edx, [eax-01010101h] xor eax, edx and eax, 80808080h jz .str_ucase and eax, edx jz .str_ucase pop edi edx eax return endp ; ----------------------------------------------- ; str_lcase: ; Author: pelaillo ; Date: Jan. 16, 2002 ; Converts also accented characters: ÑÚ <--> ñú ; ----------------------------------------------- proc StrLCase2, .hString begin push eax edx edi stdcall StrPtr, [.hString] mov edi, eax .str_lcase: mov eax, [edi] mov edx, eax and edx, 40404040h ror edx, 1 or eax, edx mov [edi], eax add edi, 4 lea edx, [eax-01010101h] xor eax, edx and eax, 80808080h jz .str_lcase and eax, edx jz .str_lcase pop edi edx eax return endp ;********************************************************************************** ; Converts strings to Lower Case ;********************************************************************************** proc StrLCase, .hString begin push eax ebx ecx edx edi stdcall StrPtr, [.hString] mov edi, eax stdcall StrLen, [.hString] mov ecx, eax mov ebx, edi and ebx, 3 sub ecx, ebx jbe .byte2 ; the string is small enough, so process it by bytes. .byte1: test edi, 3 jz .ddword mov al, [edi] and al, $40 shr al, 1 or byte [edi], al inc edi jmp .byte1 .ddword: mov ebx, ecx and ebx, 3 shr ecx, 2 jecxz .byte .qword: mov eax, [edi] mov edx, [edi+4] and eax, $40404040 and edx, $40404040 shr eax, 1 shr edx, 1 or [edi], eax or [edi+4], edx add edi, 8 dec ecx jnz .qword .byte: dec ebx js .finish mov al, [edi] and al, $40 shr al, 1 or byte [edi], al inc edi jmp .byte .byte2: mov ebx, ecx jmp .byte .finish: pop edi edx ecx ebx eax return endp ;********************************************************************************** ; Converts strings to Upper Case ; First parameter = String to Convert to upper case ;********************************************************************************** proc StrUCase, .hString begin push eax ebx ecx edx edi stdcall StrPtr, [.hString] mov edi, eax stdcall StrLen, [.hString] mov ecx, eax mov ebx, edi and ebx, 3 sub ecx, ebx jbe .byte2 ; the string is small enough, so process it by bytes. .byte1: test edi, 3 jz .ddword mov al, [edi] and al, $40 shr al, 1 not al and byte [edi], al inc edi jmp .byte1 .ddword: mov ebx, ecx and ebx, 3 shr ecx, 2 jecxz .byte .qword: mov eax, [edi] mov edx, [edi+4] and eax, $40404040 and edx, $40404040 shr eax, 1 shr edx, 1 not eax not edx and [edi], eax and [edi+4], edx add edi, 8 dec ecx jnz .qword .byte: dec ebx js .finish mov al, [edi] and al, $40 shr al, 1 not al and byte [edi], al inc edi jmp .byte .byte2: mov ebx, ecx jmp .byte .finish: pop edi edx ecx ebx eax return endp ;********************************************************************************** ; _NumToStr converts the number in eax to the string in any radix approx. [2..26] ; Arguments: ; [edi] - pointer to the string buffer ; ecx - radix ; eax - number to convert. ; There is no parameter check, so be careful. ; returns: edi points to the end of a converted number ;********************************************************************************** proc _NumToStr begin test eax,eax jns _NumToStrU neg eax mov byte [edi],"-" inc edi endp proc _NumToStrU begin cmp eax,ecx jb .lessA xor edx,edx div ecx push edx call _NumToStrU pop eax .lessA: cmp al, 10 sbb al, 69h das stosb return endp ;***************************************************** ; NumToStrF: ; Converts signed integer value to string. ; NumToStrUF: ; Converts unsigned integer value to string. ; ; edi - pointer to string buffer ; eax - Number to convert ; ecx - radix from 2 to $ff ; esi - length of the number in chars ; ; returns: edi - pointer to the end of converted num ; ; Note: Don't use 1 as radix. ;***************************************************** proc _NumToStrF begin test eax,eax jns _NumToStrUF neg eax mov byte [edi],'-' push esi dec esi add edi, esi push edi jmp _NumToStrUF.loopc endp proc _NumToStrUF begin push esi add edi, esi push edi dec edi .loopc: xor edx,edx div ecx xchg al,dl cmp al,$0a sbb al,$69 das mov [edi],al dec edi xchg al,dl dec esi jnz .loopc pop edi pop esi return endp ;*********************************************************** ; NumToStr - converts number to any radix. ; num - number to convert ; str - handle of the string. If NULL - creates new string. ; index - Offset in string where to put converted number. ; flags: ; byte 0 - number of digits if ntsFixedWidth is set. ; byte 1 - contains radix for the convertion. ; byte 2,3 - flags. ; Returns: ; eax - handle of the string. ;*********************************************************** proc NumToStr, .num, .flags begin push ebx ecx edx esi edi stdcall StrNew push eax stdcall StrSetCapacity, eax, 40 mov edi, eax push eax ; pointer for the length. ; determine which conversion func to use movzx eax, byte [.flags+2] ; signed/fixed and eax, (ntsUnsigned or ntsFixedWidth) shr 16 mov ebx, [.NumToStrFunc+4*eax] movzx ecx, byte [.flags+1] ; load radix into ecx movzx esi, byte [.flags] mov eax, [.num] call ebx ; call low-level convertion routine mov dword [edi], 0 pop eax sub edi, eax mov [eax+string.len], edi pop eax pop edi esi edx ecx ebx return .NumToStrFunc dd _NumToStr, _NumToStrU, _NumToStrF, _NumToStrUF endp ;------------------------------------------------------- ; function StrToNum ; Converts specified string into a number ; ; Arguments: ; hString - handle/pointer of the string containing ; number to convert. It doesn't have to be ended by ; NULL, any other character will stop conversion. ; Number to convert must be decimal. ; ; Return: ; eax - converted number ; edx - offset to the byte where convertion ended. ; ; Note: in case of failture (first char of given pointer ; isn't a number) function returns -1. ;------------------------------------------------------- proc StrToNum, .hString begin push ebx esi edi xor ebx,ebx ; ebx will store our number stdcall StrPtr, [.hString] mov edi, eax mov esi,eax xor eax,eax mov al,[esi] cmp al,'0' jb .error cmp al,'9' jbe .digit jmp .error .digit: sub al,'0' add ebx,eax inc esi mov al,[esi] cmp al,'0' jb .finish cmp al,'9' ja .finish mov edx,ebx ; multiply ebx by 10 shl ebx,3 add ebx,edx add ebx,edx jmp .digit .finish: mov eax, ebx mov edx, esi sub edx, edi clc pop edi esi ebx return .error: mov eax, -1 stc pop edi esi ebx return endp ; Converts a string to dword integer, using FASM number formats. ; Return: ; CF=0; eax = converted number ; CF=1; eax = 0 on invalid number proc StrToNumEx, .hstring .sign dd ? begin push ebx edx esi edi stdcall StrPtr, [.hstring] mov esi, eax mov edi, eax ; search the end: .end_loop: lodsb test al, al jz .end_found cmp al, '$' je .end_loop cmp al, '-' je .end_loop cmp al, '0' jb .end_found cmp al, '9' jbe .end_loop cmp al, '@' ja .end_loop .end_found: dec esi xchg esi, edi cmp esi, edi je .invalid_number ; sign? mov [.sign], 0 cmp byte [esi], '-' jne .radix mov [.sign], -1 inc esi ; determine what is the radix. .radix: cmp byte [esi], '$' je .hex cmp word [esi], '0x' jne .postfix inc esi .hex: inc esi mov edx, 16 jmp .decode .postfix: ; search for 'h' or 'b' or 'o' mov al, [edi-1] or al, $40 mov edx, 16 cmp al, 'h' je .postok mov edx, 8 cmp al, 'o' je .postok mov edx, 2 cmp al, 'b' je .postok mov edx, 10 inc edi .postok: dec edi ; here, edx contains the radix, esi - begin of the number; edi - end of the number. .decode: xor ebx, ebx cmp esi, edi jae .invalid_number .decode_loop: lodsb cmp al, '0' jb .invalid_number cmp al, '9' jbe .digit or al, $40 cmp al, 'z' ; common ja .invalid_number cmp al, 'a' jb .invalid_number sub al, 'a'-'0'-10 .digit: sub al, '0' movzx eax, al cmp eax, edx jae .invalid_number imul ebx, edx add ebx, eax cmp esi, edi jne .decode_loop ; set the sign: xor ebx, [.sign] sub ebx, [.sign] clc mov eax, ebx pop edi esi edx ebx return .invalid_number: xor eax, eax stc pop edi esi edx ebx return endp ;------------------------------------------------------- ; function StrCharCat ; Appends up to 4 chard at the end of the string. ; ; Arguments: ; hString - string to append ; char - char(s) to add ; Returns: ; nothing ;------------------------------------------------------- proc StrCharCat, .hString, .char begin push eax ecx stdcall StrLen, [.hString] mov ecx, eax add eax, 8 stdcall StrSetCapacity, [.hString], eax jnc @f int3 @@: pushd [.char] popd [eax+ecx] mov dword [eax+ecx+4], 0 dec ecx .goend: inc ecx cmp byte [eax+ecx], 0 jne .goend mov [eax+string.len], ecx pop ecx eax return endp ;------------------------------------------------------------ ; function StrInsertChar ; Inserts up to 4 chars into the given position of the string ; ; Arguments: ; hString - string to append ; char - char to add ; pos - position where to add the char ;------------------------------------------------------------- proc StrCharInsert, .hString, .char, .pos begin push eax stdcall GetMem, 16 pushd [.char] popd [eax] mov dword [eax+4], 0 mov dword [eax+8], 0 stdcall StrInsert, [.hString], eax, [.pos] stdcall FreeMem, eax pop eax return endp ;_______________________________________________________________________ ; proc StrClipSpacesR ; Removes the spaces from the right of the string. ; Arguments: ; hString - string to be processed ; Returns: ; CF=1 - invalid string handle. ;_______________________________________________________________________ proc StrClipSpacesR, .hString begin push eax ecx stdcall StrPtr, [.hString] jc .finish mov ecx, [eax+string.len] jecxz .exit .loop: cmp byte [eax+ecx-1], ' ' jne .exit dec ecx jnz .loop .exit: mov [eax+string.len], ecx mov dword [eax+ecx], 0 .finish: pop ecx eax return endp ;_______________________________________________________________________ ; proc StrClipSpacesL ; Removes the spaces from the left of the string. ; Arguments: ; hString - string to be processed ; Returns: ; CF=1 - invalid string handle. ;_______________________________________________________________________ proc StrClipSpacesL, .hString begin push esi edi eax ecx stdcall StrPtr, [.hString] jc .finish mov ecx, [eax+string.len] mov esi, eax mov edi, eax .loop: jecxz .copy cmp byte [esi], ' ' jne .copy inc esi dec ecx jmp .loop .copy: mov [edi+string.len], ecx jecxz .finish cmp esi, edi je .finish add ecx, 4 rep movsb .finish: pop ecx eax edi esi return endp ;_______________________________________________________________________ ; proc StrCleanDupSpaces ; Removes duplicating spaces from the string. ; Arguments: ; hString - string to be processed ; Returns: ; CF=1 - invalid string handle. ;_______________________________________________________________________ proc StrCleanDupSpaces, .hString begin push esi edi eax ecx edx stdcall StrPtr, [.hString] jc .finish mov ecx, [eax+string.len] lea edx, [eax+string.len] mov esi, eax mov edi, eax jecxz .endcopy .loop: lodsb cmp al, ' ' jne .store cmp byte [esi], ' ' jne .store ; skip dec dword [edx] jmp .next .store: stosb .next: dec ecx jnz .loop .endcopy: xor eax, eax stosd .finish: pop edx ecx eax edi esi return endp ;_______________________________________________________________________ ; ; proc StrHash ; Computes 32 bit hash value from the string. ; This procedure implements the hash algoritm: FNV-1b ; ; Arguments: ; .hString - handle of string. ; ; Return: ; eax - 32bit hash value. ; ; Changes: ; eax ;_______________________________________________________________________ proc StrHash, .hString begin stdcall StrLen, [.hString] push eax stdcall StrPtr, [.hString] push eax call DataHash return endp proc DataHash, .ptrData, .len begin push ecx edx esi mov esi, [.ptrData] mov ecx, [.len] mov eax, $811C9DC5 ; 2166136261 ; FNV offset basis inc ecx .hashloop: dec ecx jz .exit movzx edx, byte [esi] xor eax, edx inc esi imul eax, $01000193 ; 16777619 ; FNV prime jmp .hashloop .exit: pop esi edx ecx return endp proc StrURLEncode, .hstr .res dd ? begin push ebx ecx edx esi edi stdcall StrPtr, [.hstr] mov esi, eax stdcall StrLen, esi mov ecx, eax lea edx, [2*eax+eax] ; the encoded string can be max 3x long as original string. stdcall StrNew mov [.res], eax jecxz .finish stdcall StrSetCapacity, eax, edx mov edi, eax xor edx, edx xor ebx, ebx push eax .encode: lodsb cmp al, $80 jae .store ; it is a hack, but I hope save enough. mov dl, al mov bl, al shr edx, 5 and ebx, $1f bt dword [.URLCharTable+4*edx], ebx jnc .store mov ah, al mov al, '%' stosb mov al, ah shr al, 4 cmp al, $0a sbb al, $69 das stosb mov al, ah and al, $0f cmp al, $0a sbb al, $69 das .store: stosb loop .encode xor al, al mov [edi], al pop eax sub edi, eax mov [eax+string.len], edi .finish: mov eax, [.res] pop edi esi edx ecx ebx return ; Contains 1 where the character must be % encoded and 0 where it is save to pass it directly .URLCharTable db 11111111b ; db 11111111b ; db 11111111b ; db 11111111b ; 0..31 -control chars | encoded db 11111111b ; $27 - $20: '&%$#"! | encoded db 11111111b ; $2f - $28: /.-,+*)( | encoded db 00000000b ; $37 - $30: 76543210 | not encoded db 11111100b ; $3f - $38: ?>=<;:98 | partially db 00000001b ; $47 - $40: GFEDCBA@ | partially db 00000000b ; $4f - $48: ONMLKJIH | not encoded db 00000000b ; $57 - $50: WVUTSRQP | not encoded db 11111000b ; $5f - $58: _^]\[ZYX | partially db 00000001b ; $67 - $60: gfedcba` | partially db 00000000b ; $6f - $68: onmlkjih | not encoded db 00000000b ; $77 - $70: wvutsrqp | not encoded db 11111000b ; $7f - $78: ~}|{zyx | partially endp proc StrURLDecode, .hstring begin pushad stdcall StrLen, [.hstring] mov ecx, eax jecxz .finish stdcall StrPtr, [.hstring] mov esi, eax mov edi, eax mov ebx, eax .loop: lodsb test al, al jz .end_of_string cmp al, '+' je .space cmp al, '%' jne .store lodsb cmp al, '9' jbe @f add al, $09 @@: shl al, 4 mov ah, al lodsb cmp al, '9' jbe @f add al, $09 @@: and al, $0f or al, ah jmp .store .space: mov al, ' ' .store: stosb loop .loop .end_of_string: mov ecx, edi sub ecx, ebx xor eax, eax stosd mov [ebx+string.len], ecx .finish: popad return endp ; UTF-8 support functions. ; Some of the above functions also need some revision in order to support ; utf-8 strings properly. ; Bug - on [.len]=-1 sometimes in Linux returns error on normal strings. proc StrLenUtf8, .hString, .len .maxptr dd ? begin push esi ecx edx stdcall StrPtr, [.hString] mov esi, eax mov eax, [.len] cmp eax, -1 je @f add eax, esi @@: mov [.maxptr], eax xor ecx, ecx .loop: cmp esi, [.maxptr] jae .endofstring mov eax, [esi] stdcall DecodeUtf8, eax jc .error test eax, eax jz .endofstring add esi, edx inc ecx jmp .loop .endofstring: mov eax, ecx pop edx ecx esi clc return .error: pop edx ecx esi return endp proc StrOffsUtf8, .hString, .pos begin push edx esi stdcall StrPtr, [.hString] mov esi, eax .loop: dec [.pos] js .finish stdcall DecodeUtf8, [esi] jc .error test eax, eax jz .finish add esi, edx jmp .loop .finish: clc mov eax, esi pop esi edx return .error: xor eax, eax pop esi edx return endp proc ScanForwardUtf8 begin push eax mov al, [esi] test al, al jns .finish and al, 11000000b cmp al, 11000000b je .finish ; inc forward .loopf: inc esi mov al, [esi] and al, 11000000b cmp al, 10000000b je .loopf .finish: pop eax return endp proc ScanBackUtf8 begin push eax mov al, [esi] test al, al jns .finish and al, 11000000b cmp al, 11000000b je .finish ; inc back .loopf: dec esi mov al, [esi] and al, 11000000b cmp al, 10000000b je .loopf .finish: pop eax return endp proc ExpandTabs, .hstring, .tabstop .start dd ? .count dd ? .correction dd ? begin pushad mov [.count], 0 mov [.correction], 0 stdcall StrLen, [.hstring] mov ecx, eax stdcall StrPtr, [.hstring] mov [.start], eax mov esi, eax add ecx, eax xor ebx, ebx jecxz .end_scan .scan_loop: stdcall DecodeUtf8, [esi] cmp eax, $09 jne .next mov byte [esi], $20 push edx mov eax, ebx cdq div [.tabstop] imul eax, [.tabstop] pop edx add eax, [.tabstop] sub eax, ebx dec eax jz .next add ebx, eax add [.correction], eax push eax ; space count mov eax, esi sub eax, [.start] push eax ; offset inc [.count] .next: inc ebx add esi, edx cmp esi, ecx jb .scan_loop .end_scan: cmp [.count], 0 je .finish .expand: pop ebx ; offset pop eax ; count .ins_spc: stdcall StrCharInsert, [.hstring], ' ', ebx dec eax jnz .ins_spc .next_tab: dec [.count] jnz .expand .finish: popad mov eax, [.correction] return endp proc StrIP2Num, .hString begin push ebx edx esi xor ebx, ebx stdcall StrPtr, [.hString] mov esi, eax ; string to IP .iploop: stdcall StrToNum, esi cmp eax, $100 jae .invalid_ip cmp ebx, $1000000 jae .invalid_ip shl ebx, 8 or bl, al cmp byte [esi+edx], 0 je .end_of_ip cmp byte [esi+edx], '.' jne .invalid_ip lea esi, [esi+edx+1] jmp .iploop .invalid_ip: stc mov eax, ebx pop esi ebx return .end_of_ip: clc mov eax, ebx pop esi edx ebx return endp proc IP2Str, .ip begin push ebx movzx eax, byte [.ip+3] stdcall NumToStr, eax, ntsDec or ntsUnsigned mov ebx, eax stdcall StrCharCat, ebx, '.' movzx eax, byte [.ip+2] stdcall NumToStr, eax, ntsDec or ntsUnsigned stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, '.' movzx eax, byte [.ip+1] stdcall NumToStr, eax, ntsDec or ntsUnsigned stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, '.' movzx eax, byte [.ip] stdcall NumToStr, eax, ntsDec or ntsUnsigned stdcall StrCat, ebx, eax stdcall StrDel, eax mov eax, ebx pop ebx return endp proc StrEncodeHTML, .hString begin push esi edi stdcall StrNew mov edi, eax stdcall StrPtr, [.hString] mov esi, eax .loop: movzx eax, byte [esi] inc esi test eax, eax jz .end_of_string cmp al, '<' je .char_less_then cmp al, '>' je .char_greater_then cmp al, '"' je .char_quote cmp al, '&' je .char_amp ; cmp al, "'" ; je .char_apos .store: stdcall StrCharCat, edi, eax jmp .loop .end_of_string: mov eax, edi pop edi esi return .char_less_then: mov eax, '<' jmp .store .char_greater_then: mov eax, '>' jmp .store .char_quote: stdcall StrCharCat, edi, '&quo' mov eax, 't;' jmp .store ;.char_apos: ; stdcall StrCharCat, edi, '&apo' ; mov eax, 's;' ; jmp .store .char_amp: stdcall StrCharCat, edi, '&' mov eax, ';' jmp .store endp proc StrDecodeHTML, .hString begin pushad stdcall StrPtr, [.hString] mov esi, eax mov edi, eax mov ebx, eax .loop: lodsb test al, al jz .end_of_string cmp al, '&' je .collapse .store: stosb jmp .loop .collapse: cmp dword [esi], 'nbsp' jne .not_nbsp cmp byte [esi+4], ';' jne .not_nbsp add esi, 5 mov al, ' ' jmp .store .not_nbsp: mov ecx, [esi] and ecx, $ffffff cmp ecx, 'lt;' je .lessthen cmp ecx, 'gt;' jne .not_gt add esi, 3 mov al, '>' jmp .store .lessthen: add esi, 3 mov al, '<' jmp .store .not_gt: cmp dword [esi], 'quot' jne .not_quote cmp byte [esi+4], ';' jne .not_quote add esi, 5 mov al, '"' jmp .store .not_quote: cmp dword [esi], 'apos' jne .not_apos cmp byte [esi+4], ';' jne .not_apos add esi, 5 mov al, "'" jmp .store .not_apos: cmp dword [esi], 'amp;' jne .store add esi, 4 mov al, '&' jmp .store .end_of_string: mov dword [edi], 0 sub edi, ebx mov [ebx+string.len], edi popad return endp proc DateTimeToStr, .pDateTime, .format begin push ebx esi mov esi, [.pDateTime] ; date stdcall NumToStr, [esi+TDateTime.date], ntsUnsigned or ntsFixedWidth or ntsDec + 2 mov ebx, eax stdcall StrCharCat, ebx, '.' stdcall NumToStr, [esi+TDateTime.month], ntsUnsigned or ntsFixedWidth or ntsDec + 2 stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, '.' stdcall NumToStr, [esi+TDateTime.year], ntsSigned or ntsFixedWidth or ntsDec + 4 stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, ' ' ; time stdcall NumToStr, [esi+TDateTime.hour], ntsUnsigned or ntsFixedWidth or ntsDec + 2 stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, ':' stdcall NumToStr, [esi+TDateTime.minute], ntsUnsigned or ntsFixedWidth or ntsDec + 2 stdcall StrCat, ebx, eax stdcall StrDel, eax stdcall StrCharCat, ebx, ':' stdcall NumToStr, [esi+TDateTime.second], ntsUnsigned or ntsFixedWidth or ntsDec + 2 stdcall StrCat, ebx, eax stdcall StrDel, eax mov eax, ebx pop esi ebx return endp proc StrExtractFilename, .hFilename begin push ebx ecx esi stdcall StrLen, [.hFilename] mov ecx, eax stdcall StrPtr, [.hFilename] lea esi, [eax+ecx] mov ebx, eax .loop: cmp esi, ebx je .found2 dec esi mov al, [esi] cmp al, '/' je .found cmp al, '\' je .found jmp .loop .found: inc esi .found2: stdcall StrExtract, esi, 0, $7fffffff pop esi ecx ebx return endp proc StrChangeExt, .hFilename, .hNewExt begin return endp ; Normalizes some path to the minimal possible path. If the path contains ".." removes the ; previous directory and ".." directory if possible. If not possible converts until it is ; possible and returns CF=1 ; ; Arguments: ; .hPath - handle of string with the path. As long as the string will be changed, it must ; be a handle or static string but in the same format. At least the previous ; dword should contain the length of the string. ; .separators - the first two bytes of this argument specifies two possible directory ; separators. For example "/\" will accept both slashes as a valid separators. ; all separators in the result string will be converted to the first of them. ; These two characters should be equal if only one separator is to be used. ; Returns: ; CF=0 if the path was normalized without errors. ; CF=1 if the path can not be normalized to not contain ".." directory. proc StrNormalizePath, .hPath, .separators begin pushad stdcall StrPtr, [.hPath] mov edx, eax .outer: mov esi, edx mov edi, edx mov eax, [.separators] cmp word [esi], '..' jne .loop add esi, 2 cmp [esi], al je .error cmp [esi], ah je .error .loop: cmp [esi], al je .found cmp [esi], ah je .found cmp byte [esi], 0 je .end_ok inc esi jmp .loop .found: mov [esi], al inc esi cmp word [esi], '..' jne .cont cmp byte [esi+2], al je .back cmp byte [esi+3], ah je .back .cont: mov edi, esi inc esi jmp .loop .back: cmp [edi], al je .inc cmp [edi], ah jne .do_copy .inc: inc edi .do_copy: cmp edi, esi jz .error add esi, 3 .copy: lodsb stosb test al, al jnz .copy dec edi xor eax, eax mov [edi], eax sub edi, edx mov [edx+string.len], edi jmp .outer .error: stc popad return .end_ok: clc .finish: ; pushf ; mov edi, esi ; sub esi, edx ; xor eax, eax ; stosd ; mov [edx+string.len], esi ; popf popad return endp ;****************************************************** ; Computes MD5 hash of the string .hString and returns ; new string handle in eax containing the hash of the ; string. ;****************************************************** proc StrMD5, .hString begin stdcall StrLen, [.hString] push eax stdcall StrPtr, [.hString] stdcall DataMD5, eax ; length from the stack. return endp proc StrRemoveQuotes, .hString begin pushad stdcall StrPtr, [.hString] mov esi, eax stdcall StrLen, [.hString] mov ecx, eax jecxz .finish dec ecx mov al, [esi] cmp al, '"' je .quote cmp al, "'" je .quote .finish: popad return .quote: cmp [esi+ecx], al jne .finish dec ecx stdcall StrCopyPart, [.hString], [.hString], 1, ecx jmp .finish endp ; Splits the string .hString on multiply items, separated by [.separator] char. ; ; Arguments: ; .hString - handle or pointer to the string that have to be split. ; .Separator - contains UNICODE separator character. ; .fAllowEmpty - if TRUE, empty strings are allowed. if FALSE, the empty strings are not ; included in the list. ; ; Returns: ; EAX: TArray of dword containing the parts of the string. ; If the string does not contains any items, returns empty TArray ; ; The returned array should be freed when not needed with: ; stdcall ListFree, [RetPtr], StrDel ; where [RetPtr] is the pointer, returned by StrSplitList proc StrSplitList, .hString, .Separator, .fAllowEmpty begin pushad stdcall CreateArray, 4 mov edi, eax stdcall StrPtr, [.hString] jc .finish mov esi, eax .outer: mov ebx, esi xor ecx, ecx .loop: stdcall DecodeUtf8, [esi] jc .finish add esi, edx test eax, eax jz .split_here test ecx, ecx jz .not_in_quote cmp eax, ecx jne .loop xor ecx, ecx jmp .loop .not_in_quote: cmp eax, '"' je .quote_start cmp eax, "'" jne .continue .quote_start: mov ecx, eax jmp .loop .continue: cmp eax, [.Separator] jne .loop .split_here: push eax mov eax, esi sub eax, ebx dec eax stdcall StrExtract, ebx, 0, eax mov ebx, eax stdcall StrClipSpacesR, ebx stdcall StrClipSpacesL, ebx cmp [.fAllowEmpty], 0 jne .add_it stdcall StrLen, ebx test eax, eax jz .next .add_it: stdcall AddArrayItems, edi, 1 mov edi, edx mov [eax], ebx .next: pop eax test eax, eax jnz .outer .finish: mov [esp+4*regEAX], edi popad return endp include 'encodings.asm' include '%TargetOS%/utf8.asm' endmodule