diff options
Diffstat (limited to 'theories/Strings/String.v')
-rw-r--r-- | theories/Strings/String.v | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/theories/Strings/String.v b/theories/Strings/String.v new file mode 100644 index 00000000..f2c58364 --- /dev/null +++ b/theories/Strings/String.v @@ -0,0 +1,392 @@ +(************************************************************************) +(* v * The Coq Proof Assistant / The Coq Development Team *) +(* <O___,, * CNRS-Ecole Polytechnique-INRIA Futurs-Universite Paris Sud *) +(* \VV/ **************************************************************) +(* // * This file is distributed under the terms of the *) +(* * GNU Lesser General Public License Version 2.1 *) +(************************************************************************) + +(* $Id: String.v 8026 2006-02-11 19:40:49Z herbelin $ *) + +(** Contributed by Laurent Théry (INRIA); + Adapted to Coq V8 by the Coq Development Team *) + +Require Import Arith. +Require Import Ascii. + +(** *** Definition of strings *) + +(** Implementation of string as list of ascii characters *) + +Inductive string : Set := + | EmptyString : string + | String : ascii -> string -> string. + +Delimit Scope string_scope with string. +Bind Scope string_scope with string. +Open Local Scope string_scope. + +(** Equality is decidable *) + +Definition string_dec : forall s1 s2 : string, {s1 = s2} + {s1 <> s2}. + decide equality; apply ascii_dec. +Defined. + +(** *** Concatenation of strings *) + +Reserved Notation "x ++ y" (right associativity, at level 60). + +Fixpoint append (s1 s2 : string) {struct s1} : string := + match s1 with + | EmptyString => s2 + | String c s1' => String c (s1' ++ s2) + end + +where "s1 ++ s2" := (append s1 s2) : string_scope. + +(******************************) +(** Length *) +(******************************) + +Fixpoint length (s : string) : nat := + match s with + | EmptyString => 0 + | String c s' => S (length s') + end. + +(******************************) +(** Nth character of a string *) +(******************************) + +Fixpoint get (n : nat) (s : string) {struct s} : option ascii := + match s with + | EmptyString => None + | String c s' => match n with + | O => Some c + | S n' => get n' s' + end + end. + +(** Two lists that are identical through get are syntactically equal *) + +Theorem get_correct : + forall s1 s2 : string, (forall n : nat, get n s1 = get n s2) <-> s1 = s2. +Proof. +intros s1; elim s1; simpl in |- *. +intros s2; case s2; simpl in |- *; split; auto. +intros H; generalize (H 0); intros H1; inversion H1. +intros; discriminate. +intros a s1' Rec s2; case s2; simpl in |- *; split; auto. +intros H; generalize (H 0); intros H1; inversion H1. +intros; discriminate. +intros H; generalize (H 0); simpl in |- *; intros H1; inversion H1. +case (Rec s). +intros H0; rewrite H0; auto. +intros n; exact (H (S n)). +intros H; injection H; intros H1 H2 n; case n; auto. +rewrite H2; trivial. +rewrite H1; auto. +Qed. + +(** The first elements of [s1 ++ s2] are the ones of [s1] *) + +Theorem append_correct1 : + forall (s1 s2 : string) (n : nat), + n < length s1 -> get n s1 = get n (s1 ++ s2). +Proof. +intros s1; elim s1; simpl in |- *; auto. +intros s2 n H; inversion H. +intros a s1' Rec s2 n; case n; simpl in |- *; auto. +intros n0 H; apply Rec; auto. +apply lt_S_n; auto. +Qed. + +(** The last elements of [s1 ++ s2] are the ones of [s2] *) + +Theorem append_correct2 : + forall (s1 s2 : string) (n : nat), + get n s2 = get (n + length s1) (s1 ++ s2). +Proof. +intros s1; elim s1; simpl in |- *; auto. +intros s2 n; rewrite plus_comm; simpl in |- *; auto. +intros a s1' Rec s2 n; case n; simpl in |- *; auto. +generalize (Rec s2 0); simpl in |- *; auto. +intros n0; rewrite <- Plus.plus_Snm_nSm; auto. +Qed. + +(** *** Substrings *) + +(** [substring n m s] returns the substring of [s] that starts + at position [n] and of length [m]; + if this does not make sense it returns [""] *) + +Fixpoint substring (n m : nat) (s : string) {struct s} : string := + match n, m, s with + | 0, 0, _ => EmptyString + | 0, S m', EmptyString => s + | 0, S m', String c s' => String c (substring 0 m' s') + | S n', _, EmptyString => s + | S n', _, String c s' => substring n' m s' + end. + +(** The substring is included in the initial string *) + +Theorem substring_correct1 : + forall (s : string) (n m p : nat), + p < m -> get p (substring n m s) = get (p + n) s. +Proof. +intros s; elim s; simpl in |- *; auto. +intros n; case n; simpl in |- *; auto. +intros m; case m; simpl in |- *; auto. +intros a s' Rec; intros n; case n; simpl in |- *; auto. +intros m; case m; simpl in |- *; auto. +intros p H; inversion H. +intros m' p; case p; simpl in |- *; auto. +intros n0 H; apply Rec; simpl in |- *; auto. +apply Lt.lt_S_n; auto. +intros n' m p H; rewrite <- Plus.plus_Snm_nSm; simpl in |- *; auto. +Qed. + +(** The substring has at most [m] elements *) + +Theorem substring_correct2 : + forall (s : string) (n m p : nat), m <= p -> get p (substring n m s) = None. +Proof. +intros s; elim s; simpl in |- *; auto. +intros n; case n; simpl in |- *; auto. +intros m; case m; simpl in |- *; auto. +intros a s' Rec; intros n; case n; simpl in |- *; auto. +intros m; case m; simpl in |- *; auto. +intros m' p; case p; simpl in |- *; auto. +intros H; inversion H. +intros n0 H; apply Rec; simpl in |- *; auto. +apply Le.le_S_n; auto. +Qed. + +(** *** Test functions *) + +(** Test if [s1] is a prefix of [s2] *) + +Fixpoint prefix (s1 s2 : string) {struct s2} : bool := + match s1 with + | EmptyString => true + | String a s1' => + match s2 with + | EmptyString => false + | String b s2' => + match ascii_dec a b with + | left _ => prefix s1' s2' + | right _ => false + end + end + end. + +(** If [s1] is a prefix of [s2], it is the [substring] of length + [length s1] starting at position [O] of [s2] *) + +Theorem prefix_correct : + forall s1 s2 : string, + prefix s1 s2 = true <-> substring 0 (length s1) s2 = s1. +Proof. +intros s1; elim s1; simpl in |- *; auto. +intros s2; case s2; simpl in |- *; split; auto. +intros a s1' Rec s2; case s2; simpl in |- *; auto. +split; intros; discriminate. +intros b s2'; case (ascii_dec a b); simpl in |- *; auto. +intros e; case (Rec s2'); intros H1 H2; split; intros H3; auto. +rewrite e; rewrite H1; auto. +apply H2; injection H3; auto. +intros n; split; intros; try discriminate. +case n; injection H; auto. +Qed. + +(** Test if, starting at position [n], [s1] occurs in [s2]; if + so it returns the position *) + +Fixpoint index (n : nat) (s1 s2 : string) {struct s2} : option nat := + match s2, n with + | EmptyString, 0 => + match s1 with + | EmptyString => Some 0 + | String a s1' => None + end + | EmptyString, S n' => None + | String b s2', 0 => + if prefix s1 s2 then Some 0 + else + match index 0 s1 s2' with + | Some n => Some (S n) + | None => None + end + | String b s2', S n' => + match index n' s1 s2' with + | Some n => Some (S n) + | None => None + end + end. + +(* Dirty trick to evaluate locally that prefix reduces itself *) +Opaque prefix. + +(** If the result of [index] is [Some m], [s1] in [s2] at position [m] *) + +Theorem index_correct1 : + forall (n m : nat) (s1 s2 : string), + index n s1 s2 = Some m -> substring m (length s1) s2 = s1. +Proof. +intros n m s1 s2; generalize n m s1; clear n m s1; elim s2; simpl in |- *; + auto. +intros n; case n; simpl in |- *; auto. +intros m s1; case s1; simpl in |- *; auto. +intros H; injection H; intros H1; rewrite <- H1; auto. +intros; discriminate. +intros; discriminate. +intros b s2' Rec n m s1. +case n; simpl in |- *; auto. +generalize (prefix_correct s1 (String b s2')); + case (prefix s1 (String b s2')). +intros H0 H; injection H; intros H1; rewrite <- H1; auto. +case H0; simpl in |- *; auto. +case m; simpl in |- *; auto. +case (index 0 s1 s2'); intros; discriminate. +intros m'; generalize (Rec 0 m' s1); case (index 0 s1 s2'); auto. +intros x H H0 H1; apply H; injection H1; intros H2; injection H2; auto. +intros; discriminate. +intros n'; case m; simpl in |- *; auto. +case (index n' s1 s2'); intros; discriminate. +intros m'; generalize (Rec n' m' s1); case (index n' s1 s2'); auto. +intros x H H1; apply H; injection H1; intros H2; injection H2; auto. +intros; discriminate. +Qed. + +(** If the result of [index] is [Some m], + [s1] does not occur in [s2] before [m] *) + +Theorem index_correct2 : + forall (n m : nat) (s1 s2 : string), + index n s1 s2 = Some m -> + forall p : nat, n <= p -> p < m -> substring p (length s1) s2 <> s1. +Proof. +intros n m s1 s2; generalize n m s1; clear n m s1; elim s2; simpl in |- *; + auto. +intros n; case n; simpl in |- *; auto. +intros m s1; case s1; simpl in |- *; auto. +intros H; injection H; intros H1; rewrite <- H1. +intros p H0 H2; inversion H2. +intros; discriminate. +intros; discriminate. +intros b s2' Rec n m s1. +case n; simpl in |- *; auto. +generalize (prefix_correct s1 (String b s2')); + case (prefix s1 (String b s2')). +intros H0 H; injection H; intros H1; rewrite <- H1; auto. +intros p H2 H3; inversion H3. +case m; simpl in |- *; auto. +case (index 0 s1 s2'); intros; discriminate. +intros m'; generalize (Rec 0 m' s1); case (index 0 s1 s2'); auto. +intros x H H0 H1 p; try case p; simpl in |- *; auto. +intros H2 H3; red in |- *; intros H4; case H0. +intros H5 H6; absurd (false = true); auto with bool. +intros n0 H2 H3; apply H; auto. +injection H1; intros H4; injection H4; auto. +apply Le.le_O_n. +apply Lt.lt_S_n; auto. +intros; discriminate. +intros n'; case m; simpl in |- *; auto. +case (index n' s1 s2'); intros; discriminate. +intros m'; generalize (Rec n' m' s1); case (index n' s1 s2'); auto. +intros x H H0 p; case p; simpl in |- *; auto. +intros H1; inversion H1; auto. +intros n0 H1 H2; apply H; auto. +injection H0; intros H3; injection H3; auto. +apply Le.le_S_n; auto. +apply Lt.lt_S_n; auto. +intros; discriminate. +Qed. + +(** If the result of [index] is [None], [s1] does not occur in [s2] + after [n] *) + +Theorem index_correct3 : + forall (n m : nat) (s1 s2 : string), + index n s1 s2 = None -> + s1 <> EmptyString -> n <= m -> substring m (length s1) s2 <> s1. +Proof. +intros n m s1 s2; generalize n m s1; clear n m s1; elim s2; simpl in |- *; + auto. +intros n; case n; simpl in |- *; auto. +intros m s1; case s1; simpl in |- *; auto. +case m; intros; red in |- *; intros; discriminate. +intros n' m; case m; auto. +intros s1; case s1; simpl in |- *; auto. +intros b s2' Rec n m s1. +case n; simpl in |- *; auto. +generalize (prefix_correct s1 (String b s2')); + case (prefix s1 (String b s2')). +intros; discriminate. +case m; simpl in |- *; auto with bool. +case s1; simpl in |- *; auto. +intros a s H H0 H1 H2; red in |- *; intros H3; case H. +intros H4 H5; absurd (false = true); auto with bool. +case s1; simpl in |- *; auto. +intros a s n0 H H0 H1 H2; + change (substring n0 (length (String a s)) s2' <> String a s) in |- *; + apply (Rec 0); auto. +generalize H0; case (index 0 (String a s) s2'); simpl in |- *; auto; intros; + discriminate. +apply Le.le_O_n. +intros n'; case m; simpl in |- *; auto. +intros H H0 H1; inversion H1. +intros n0 H H0 H1; apply (Rec n'); auto. +generalize H; case (index n' s1 s2'); simpl in |- *; auto; intros; + discriminate. +apply Le.le_S_n; auto. +Qed. + +(* Back to normal for prefix *) +Transparent prefix. + +(** If we are searching for the [Empty] string and the answer is no + this means that [n] is greater than the size of [s] *) + +Theorem index_correct4 : + forall (n : nat) (s : string), + index n EmptyString s = None -> length s < n. +Proof. +intros n s; generalize n; clear n; elim s; simpl in |- *; auto. +intros n; case n; simpl in |- *; auto. +intros; discriminate. +intros; apply Lt.lt_O_Sn. +intros a s' H n; case n; simpl in |- *; auto. +intros; discriminate. +intros n'; generalize (H n'); case (index n' EmptyString s'); simpl in |- *; + auto. +intros; discriminate. +intros H0 H1; apply Lt.lt_n_S; auto. +Qed. + +(** Same as [index] but with no optional type, we return [0] when it + does not occur *) + +Definition findex n s1 s2 := + match index n s1 s2 with + | Some n => n + | None => 0 + end. + +(** *** Concrete syntax *) + +(** + The concrete syntax for strings in scope string_scope follows the + Coq convention for strings: all ascii characters of code less than + 128 are litteral to the exception of the character `double quote' + which must be doubled. + + Strings that involve ascii characters of code >= 128 which are not + part of a valid utf8 sequence of characters are not representable + using the Coq string notation (use explicitly the String constructor + with the ascii codes of the characters). +*) + +Example HelloWorld := " ""Hello world!"" +". |