UTF8

#

type t = string

UTF-8 encoded Unicode strings. The type is normal string.

#

exception Malformed_code

#

val validate : t -> unit

validate s successes if s is valid UTF-8, otherwise raises Malformed_code. Other functions assume strings are valid UTF-8, so it is prudent to test their validity for strings from untrusted origins.

#

val get : t -> int -> UChar.t

get s n returns n-th Unicode character of s. The call requires O(n)-time.

#

val init : int -> (int -> UChar.t) -> t

init len f returns a new string which contains len Unicode characters. The i-th Unicode character is initialized by f i

#

val length : t -> int

length s returns the number of Unicode characters contained in s

#

type index = int

Positions in the string represented by the number of bytes from the head. The location of the first character is 0

#

val nth : t -> int -> index

nth s n returns the position of the n-th Unicode character. The call requires O(n)-time

#

val first : t -> index

The position of the head of the first Unicode character.

#

val last : t -> index

The position of the head of the last Unicode character.

#

val look : t -> index -> UChar.t

look s i returns the Unicode character of the location i in the string s.

#

val out_of_range : t -> index -> bool

out_of_range s i tests whether i is a position inside of s.

#

val compare_index : t -> index -> index -> int

compare_index s i1 i2 returns a value < 0 if i1 is the position located before i2, 0 if i1 and i2 points the same location, a value > 0 if i1 is the position located after i2.

#

val next : t -> index -> index

next s i returns the position of the head of the Unicode character located immediately after i. If i is inside of s, the function always successes. If i is inside of s and there is no Unicode character after i, the position outside s is returned. If i is not inside of s, the behaviour is unspecified.

#

val prev : t -> index -> index

prev s i returns the position of the head of the Unicode character located immediately before i. If i is inside of s, the function always successes. If i is inside of s and there is no Unicode character before i, the position outside s is returned. If i is not inside of s, the behaviour is unspecified.

#

val move : t -> index -> int -> index

move s i n returns n-th Unicode character after i if n >= 0, n-th Unicode character before i if n < 0. If there is no such character, the result is unspecified.

#

val iter : (UChar.t -> unit) -> t -> unit

iter f s applies f to all Unicode characters in s. The order of application is same to the order of the Unicode characters in s.

#

val compare : t -> t -> int

Code point comparison by the lexicographic order. compare s1 s2 returns a positive integer if s1 > s2, 0 if s1 = s2, a negative integer if s1 < s2.

#

module Buf : sig

Buffer module for UTF-8 strings

#

type buf

Buffers for UTF-8 strings.

#

val create : int -> buf

create n creates the buffer with the initial size n-bytes.

#

val contents : buf -> t

contents buf returns the contents of the buffer.

#

val clear : buf -> unit

Empty the buffer, but retains the internal storage which was holding the contents

#

val reset : buf -> unit

Empty the buffer and de-allocate the internal storage.

#

val add_char : buf -> UChar.t -> unit

Add one Unicode character to the buffer.

#

val add_string : buf -> t -> unit

Add the UTF-8 string to the buffer.

#

val add_buffer : buf -> buf -> unit

add_buffer b1 b2 adds the contents of b2 to b1. The contents of b2 is not changed.

end

with type buf = Buffer.t

module UTF8