Up

module Core_string

: sig

An extension of the standard StringLabels. If you open Core.Std, you'll get these in the String module.

#
type t = string
#
val typerep_of_t : t Typerep_lib.Std.Typerep.t
#
val typename_of_t : t Typerep_lib.Std.Typename.t
#
module Caseless : sig

Caseless compares and hashes strings ignoring case, so that for example Caseless.equal "OCaml" "ocaml" and Caseless.("apple" < "Banana") are true, and Caseless.Map, Caseless.Table lookup and Caseless.Set membership is case-insensitive.

include Comparable.S_binable with type t := t
include Hashable.S_binable with type t := t
end
include Blit.S with type t := t
include Container.S0 with type t := t and type elt = char
include Identifiable.S with type t := t
#
val max_length : int

Maximum length of a string.

#
external length : t -> int = "%string_length"
#
external get : t -> int -> char = "%string_safe_get"
#
external set : t -> int -> char -> unit = "%string_safe_set"
#
external create : int -> t = "caml_create_string"
#
val make : int -> char -> t
#
val copy : t -> t
#
val init : int -> f:(int -> char) -> t
#
val fill : t -> pos:int -> len:int -> char -> unit
#
val concat : ?sep:t -> t list -> t

concatanate all strings in the list using separator sep (default sep "")

#
val escaped : t -> t

Warning: Only returns a copy if changes are necessary! Special characters are represented by escape sequences, following the lexical conventions of Objective Caml.

#
val contains : ?pos:int -> ?len:int -> t -> char -> bool
#
val uppercase : t -> t
#
val lowercase : t -> t
#
val capitalize : t -> t
#
val uncapitalize : t -> t
#
val index : t -> char -> int option
#
val index_exn : t -> char -> int
#
val rindex : t -> char -> int option
#
val rindex_exn : t -> char -> int
#
val index_from : t -> int -> char -> int option
#
val index_from_exn : t -> int -> char -> int
#
val rindex_from : t -> int -> char -> int option
#
val rindex_from_exn : t -> int -> char -> int
#
module Search_pattern : sig

Substring search and replace functions. They use the Knuth-Morris-Pratt algorithm (KMP) under the hood.

The functions in the Search_pattern module allow the program to preprocess the searched pattern once and then use it many times without further allocations.

#
type t
#
val create : string -> t

create pattern preprocesses pattern as per KMP, building an int array of length length pattern. All inputs are valid.

#
val index : ?pos:int -> t -> in_:string -> int option

pos < 0 or pos >= length string result in no match (hence index returns None and index_exn raises).

#
val index_exn : ?pos:int -> t -> in_:string -> int
#
val index_all : t -> may_overlap:bool -> in_:string -> int list
#
val replace_first : ?pos:int -> t -> in_:string -> with_:string -> string

Note that the result of replace_all pattern ~in_:text ~with_:r may still contain pattern, e.g.

        replace_all (create "bc") ~in:"aabbcc" ~with_:"cb" = "aabcbc"
#
val replace_all : t -> in_:string -> with_:string -> string
#
val sexp_of_t : t -> Sexplib.Sexp.t
end
#
val substr_index : ?pos:int -> t -> pattern:t -> int option

Substring search and replace convenience functions. They call Search_pattern.create and then forget the preprocessed pattern when the search is complete. pos < 0 or pos >= length t result in no match (hence substr_index returns None and substr_index_exn raises). may_overlap indicates whether to report overlapping matches, see Search_pattern.index_all.

#
val substr_index_exn : ?pos:int -> t -> pattern:t -> int
#
val substr_index_all : t -> may_overlap:bool -> pattern:t -> int list
#
val substr_replace_first : ?pos:int -> t -> pattern:t -> with_:t -> t

As with Search_pattern.replace_all, the result may still contain pattern.

#
val substr_replace_all : t -> pattern:t -> with_:t -> t
#
val slice : t -> int -> int -> t

slice s start stop gets a slice of s between start and stop. start and stop will be normalized before the access. (viz. Core_array.normalize).

#
val to_list_rev : t -> char list

Returns the reversed list of characters contained in a list.

#
val rev : t -> t

rev t returns t in reverse order.

#
val nget : t -> int -> char

nget s i Gets the char at normalized position i in s.

#
val nset : t -> int -> char -> unit

nset s i c Sets the char at normalized position i to c.

#
val is_suffix : t -> suffix:t -> bool

is_suffix s ~suffix returns true if s ends with suffix.

#
val is_prefix : t -> prefix:t -> bool

is_prefix s ~prefix returns true if s starts with prefix.

#
val lsplit2_exn : t -> on:char -> t * t

If the string s contains the character on, then lsplit2_exn s ~on returns a pair containing s split around the first appearance of on (from the left).

Raises Not_found When on cannot be found in s
#
val rsplit2_exn : t -> on:char -> t * t

If the string s contains the character on, then rsplit2_exn s ~on returns a pair containing s split around the first appearance of on (from the right).

Raises Not_found When on cannot be found in s
#
val lsplit2 : t -> on:char -> (t * t) option

lsplit2 line ~on optionally returns line split into two strings around the first appearance of on from the left

#
val rsplit2 : t -> on:char -> (t * t) option

rsplit2 line ~on optionally returns line split into two strings around the first appearance of on from the right

#
val split : t -> on:char -> t list

split s ~on

Returns a list of substrings of s that are separated by on. Consecutive on characters will cause multiple empty strings in the result. Splitting the empty string returns a list of the empty string, not the empty list.
#
val split_on_chars : t -> on:char list -> t list

split_on_chars s ~on

Returns a list of all substrings of s that are separated by one of the chars from on. on are not grouped. So a grouping of on in the source string will produce multiple empty string splits in the result.
#
val split_lines : t -> t list

split_lines t returns the list of lines that comprise t. The lines do not include the trailing "\n" or "\r\n".

#
val lfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option

lfindi ?pos t ~f returns the smallest i >= pos such that f i t.[i], if there is such an i. By default, pos = 0.

#
val rfindi : ?pos:int -> t -> f:(int -> char -> bool) -> int option

rfindi ?pos t ~f returns the largest i <= pos such that f i t.[i], if there is such an i. By default pos = length t - 1.

#
val lstrip : ?drop:(char -> bool) -> t -> t

lstrip ?drop s returns a string with consecutive chars satisfying drop (by default white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the beginning of s.

#
val rstrip : ?drop:(char -> bool) -> t -> t

rstrip ?drop s returns a string with consecutive chars satisfying drop (by default white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the end of s.

#
val strip : ?drop:(char -> bool) -> t -> t

strip ?drop s returns a string with consecutive chars satisfying drop (by default white space, e.g. tabs, spaces, newlines, and carriage returns) stripped from the beginning and end of s.

#
val map : t -> f:(char -> char) -> t

map f s applies f to each character in s, and returns the resulting string.

#
val mapi : t -> f:(int -> char -> char) -> t

mapi f s applies f to each character in s and its index, and returns the resulting string.

#
val foldi : t -> init:'a -> f:(int -> 'a -> char -> 'a) -> 'a

foldi works similarly to fold, but also pass in index of each character to f

#
val concat_map : ?sep:t -> t -> f:(char -> t) -> t

Like map, but allows replacement of a single character with zero or two or more characters.

#
val filter : t -> f:(char -> bool) -> t

filter s ~f:predicate discards characters not satisfying predicate

#
val tr : target:char -> replacement:char -> t -> t

tr target replacement s replaces every instance of target in s with replacement.

#
val tr_inplace : target:char -> replacement:char -> t -> unit

tr_inplace target replacement s destructively modifies s (in place!) replacing every instance of target in s with replacement.

#
val chop_suffix_exn : t -> suffix:t -> t

chop_suffix s ~suf returns a copy s without the trailing suff

Raises Invalid_argument is suff is not a suffix of s
#
val chop_prefix_exn : t -> prefix:t -> t

chop_prefix s ~pref returns a copy s without the leading pref

Raises Invalid_argument is pref is not a prefix of s
#
val chop_suffix : t -> suffix:t -> t option
#
val chop_prefix : t -> prefix:t -> t option
#
val suffix : t -> int -> t

suffix s n returns the longest suffix of s of length less than or equal to n

#
val prefix : t -> int -> t

prefix s n returns the longest prefix of s of length less than or equal to n

#
val drop_suffix : t -> int -> t

drop_suffix s n drops the longest suffix of s of length less than or equal to n

#
val drop_prefix : t -> int -> t

drop_prefix s n drops the longest prefix of s of length less than or equal to n

#
val concat_array : ?sep:t -> t array -> t

concat_array sep ar like String.concat, but operates on arrays

#
external hash : t -> int = "caml_hash_string" "noalloc"

slightly faster hash function on strings

#
val equal : t -> t -> bool

fast equality function on strings, doesn't use compare_val

#
val is_empty : t -> bool

is_empty s returns true iff s is empty (i.e. its length is 0).

#
module Infix : sig
#
val (</>) : t -> int * int -> t
end
#
val of_char : char -> t
#
val of_char_list : char list -> t
#
module Escaping : sig

Operations for escaping and unescaping strings, with paramaterized escape and escapeworthy characters. Escaping/unescaping using this module is more efficient than using Pcre. Benchmark code can be found in core/benchmarks/string_escaping.ml.

#
val escape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t

escape_gen_exn escapeworthy_map escape_char returns a function that will escape a string s as follows: if (c1,c2) is in escapeworthy_map, then all occurences of c1 are replaced by escape_char concatenated to c2.

Raises an exception if escapeworthy_map is not one-to-one. If escape_char is not in escapeworthy_map, then it will be escaped to itself.

#
val escape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
#
val escape : escapeworthy:char list -> escape_char:char -> (string -> string) Staged.t

escape ~escapeworthy ~escape_char s is

        escape_gen_exn ~escapeworthy_map:(List.zip_exn escapeworthy escapeworthy)
          ~escape_char

. Duplicates and escape_char will be removed from escapeworthy. So, no exception will be raised

#
val unescape_gen_exn : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Staged.t

unescape_gen_exn is the inverse operation of escape_gen_exn. That is,

      let escape = Staged.unstage (escape_gen_exn ~escapeworthy_map ~escape_char) in
      let unescape = Staged.unstage (unescape_gen_exn ~escapeworthy_map ~escape_char) in
      assert (s = unescape (escape s))

always succeed when ~escapeworthy_map is not causing exceptions.

#
val unescape_gen : escapeworthy_map:(char * char) list -> escape_char:char -> (string -> string) Or_error.t
#
val unescape : escape_char:char -> (string -> string) Staged.t

unescape ~escape_char is defined as unescape_gen_exn ~map:[] ~escape_char

#
val is_char_escaping : string -> escape_char:char -> int -> bool

Any char in an escaped string is either escaping, escaped or literal. For example, for escaped string "0_a0__0" with escape_char as '_', pos 1 and 4 are escaping, 2 and 5 are escaped, and the rest are literal

is_char_escaping s ~escape_char pos return true if the char at pos is escaping, false otherwise.

#
val is_char_escaped : string -> escape_char:char -> int -> bool

is_char_escaped s ~escape_char pos return true if the char at pos is escaped, false otherwise.

#
val is_char_literal : string -> escape_char:char -> int -> bool

is_literal s ~escape_char pos return true if the char at pos is not escaped or escaping.

#
val index : string -> escape_char:char -> char -> int option

index s ~escape_char char find the first literal (not escaped) instance of char in s starting from 0.

#
val index_exn : string -> escape_char:char -> char -> int
#
val rindex : string -> escape_char:char -> char -> int option

rindex s ~escape_char char find the first literal (not escaped) instance of char in s starting from the end of s and proceeding towards 0.

#
val rindex_exn : string -> escape_char:char -> char -> int
#
val index_from : string -> escape_char:char -> int -> char -> int option

index_from s ~escape_char pos char find the first literal (not escaped) instance of char in s starting from pos and proceeding towards the end of s.

#
val index_from_exn : string -> escape_char:char -> int -> char -> int
#
val rindex_from : string -> escape_char:char -> int -> char -> int option

rindex_from s ~escape_char pos char find the first literal (not escaped) instance of char in s starting from pos and towards 0.

#
val rindex_from_exn : string -> escape_char:char -> int -> char -> int
#
val split : string -> on:char -> escape_char:char -> string list

split s ~escape_char ~on

Returns a list of substrings of s that are separated by literal versions of on. Consecutive on characters will cause multiple empty strings in the result. Splitting the empty string returns a list of the empty string, not the empty list.
e.g. split ~escape_char:'_' ~on:',' "foo,bar_,baz" = "foo"; "bar_,baz"
#
val split_on_chars : string -> on:char list -> escape_char:char -> string list

split_on_chars s ~on

Returns a list of all substrings of s that are separated by one of the literal chars from on. on are not grouped. So a grouping of on in the source string will produce multiple empty string splits in the result.
e.g. split_on_chars ~escape_char:'_' ~on:',';'|' "foo_|bar,baz|0" -> "foo_|bar"; "baz"; "0"
#
val lsplit2 : string -> on:char -> escape_char:char -> (string * string) option
#
val lsplit2_exn : string -> on:char -> escape_char:char -> string * string
#
val rsplit2 : string -> on:char -> escape_char:char -> (string * string) option
#
val rsplit2_exn : string -> on:char -> escape_char:char -> string * string
end
#
external unsafe_get : string -> int -> char = "%string_unsafe_get"
#
external unsafe_set : string -> int -> char -> unit = "%string_unsafe_set"
#
val t_of_sexp : Sexplib.Sexp.t -> t
#
val sexp_of_t : t -> Sexplib.Sexp.t
#
val bin_t : t Bin_prot.Type_class.t
#
val bin_read_t : t Bin_prot.Read.reader
#
val __bin_read_t__ : (int -> t) Bin_prot.Read.reader
#
val bin_reader_t : t Bin_prot.Type_class.reader
#
val bin_size_t : t Bin_prot.Size.sizer
#
val bin_write_t : t Bin_prot.Write.writer
#
val bin_writer_t : t Bin_prot.Type_class.writer
end