Re: practical Proof Checking systems

John Harrison (jharriso@ra.abo.fi)
Mon, 18 Mar 1996 22:32:37 +0200

Yuri writes:

I am intersted in application of Proof Checking systems
in mathematical education.

1) What Proof Checking systems are currently in practical use?

As far as I'm aware, HOL is not used much in education. Sometimes students
are taught HOL, but as part of a course in mechanical verification rather
than as an aide for other subjects. However I hope this situation will
change in the future as more elegant proof styles become available. See for
example the Web page "http://www.abo.fi/~jgrundy/schoolmath/schoolmath.html".

2) How much are formal proofs longer than usual proofs?

I don't think this has been studied extensively. First, HOL is more usually
applied to verification proofs rather than textbook mathematics, so there just
aren't many comparisons to be made. Also, the ability to write arbitrary
proof procedures and to use them in various different styles makes it hard to
give a uniform measure.

3) What are the lengths (lines) of formal proofs of the following theorems:
a) The set of all prime numbers is infinite;
b) 1^2 + 2^2 + ... + n^2 = n*(n+1)*(2*n+1) / 6
(if you have such formal proofs please send them to me, or give a
reference);

Some proofs I did in a version of HOL are given below. They're a bit ugly;
with more use of automated proof procedures they could probably be made a
lot more elegant.

4) How much time is needed for a typical student to write such
formal proofs?

I don't know. Probably a long time. The proofs below took me around
6 minutes for the sum-of-squares and 57 minutes for the primes, though the
latter involved defining a lot of preliminary stuff. And I'm a fairly
experienced HOL user.

5) How much time is needed for your system to check such proofs?

About 15 seconds each.

Cheers,

John.

=========================================================================
John Harrison | email: jharriso@abo.fi
Abo Akademi University | web: http://www.abo.fi/~jharriso/
Department of Computer Science | phone: +358 (9)21 265-4049
Lemminkaisenkatu 14a | fax: +358 (9)21 265-4732
20520 Turku | home: +358 (9)21 2316132
FINLAND | time: UTC+2:00
=========================================================================

(* sum of squares *)

let SUMMATION = new_recursive_definition num_Axiom
`(SUMMATION 0 f = f 0) /\
(SUMMATION (SUC n) f = SUMMATION n f + f (SUC n))`;;

let SOS_LEMMA = prove
(`!n. 6 * SUMMATION n (\i. i EXP 2) = n * (n + 1) * (2 * n + 1)`,
INDUCT_TAC THEN
ASM_REWRITE_TAC[ARITH; SUMMATION; LEFT_ADD_DISTRIB] THEN
REWRITE_TAC[EXP_2] THEN ARITH_TAC);;

let SOS = prove
(`SUMMATION n (\i. i EXP 2) = (n * (n + 1) * (2 * n + 1)) DIV 6`,
REWRITE_TAC[GSYM SOS_LEMMA] THEN
MATCH_MP_TAC(GSYM DIV_MULT) THEN
REWRITE_TAC[ARITH]);;

(* infinity of primes *)

parse_as_infix("divides",(12,"right"));;

let divides = new_definition
`m divides n = ?x. n = m * x`;;

let prime = new_definition
`prime(p) = p > 1 /\ !n. n divides p ==> (n = 1) \/ (n = p)`;;

let FINITE_RULES,FINITE_INDUCT,FINITE_CASES =
new_inductive_definition
`FINITE EMPTY /\
!(x:A) s. FINITE s ==> FINITE (x INSERT s)`;;

let INFINITE = new_definition
`INFINITE (s:A->bool) = ~(FINITE s)`;;

let DIVIDES_REFL = prove
(`!n. n divides n`,
GEN_TAC THEN REWRITE_TAC[divides] THEN EXISTS_TAC `1` THEN
REWRITE_TAC[MULT_CLAUSES]);;

let DIVIDES_TRANS = prove
(`!m n p. m divides n /\ n divides p ==> m divides p`,
REPEAT GEN_TAC THEN REWRITE_TAC[divides] THEN
DISCH_THEN(CONJUNCTS_THEN2
(X_CHOOSE_THEN `x1:num` ASSUME_TAC)
(X_CHOOSE_THEN `x2:num` ASSUME_TAC)) THEN
EXISTS_TAC `x1 * x2` THEN
ASM_REWRITE_TAC[MULT_ASSOC]);;

let DIVIDES_0 = prove
(`!n. n divides 0`,
GEN_TAC THEN REWRITE_TAC[divides] THEN
EXISTS_TAC `0` THEN REWRITE_TAC[MULT_CLAUSES]);;

let DIVIDES_ZERO = prove
(`!n. 0 divides n = (n = 0)`,
REWRITE_TAC[divides; MULT_CLAUSES]);;

let DIVIDES_MULT = prove
(`!m n p. m divides n ==> m divides (p * n)`,
REPEAT GEN_TAC THEN REWRITE_TAC[divides] THEN
DISCH_THEN(X_CHOOSE_TAC `x:num`) THEN
EXISTS_TAC `p * x` THEN ASM_REWRITE_TAC[] THEN
CONV_TAC(AC_CONV(MULT_ASSOC,MULT_SYM)));;

let DIVIDES_LE = prove
(`!m n. m divides n ==> (n = 0) \/ m <= n`,
REPEAT GEN_TAC THEN REWRITE_TAC[divides] THEN
DISCH_THEN(X_CHOOSE_THEN `x:num` SUBST1_TAC) THEN
SPEC_TAC(`x:num`,`x:num`) THEN
INDUCT_TAC THEN REWRITE_TAC[MULT_CLAUSES; NOT_SUC; LE_ADD]);;

let DIVIDES_ADD = prove
(`!m n p. m divides n /\ m divides (n + p) ==> m divides p`,
REPEAT GEN_TAC THEN REWRITE_TAC[divides] THEN
DISCH_THEN(CONJUNCTS_THEN2
(X_CHOOSE_THEN `x1:num` (ASSUME_TAC o SYM))
(X_CHOOSE_THEN `x2:num` (ASSUME_TAC o SYM))) THEN
EXISTS_TAC `x2 - x1` THEN ASM_REWRITE_TAC[LEFT_SUB_DISTRIB; ADD_SUB2]);;

let DIVIDES_1 = prove
(`!n. n divides 1 ==> (n = 1)`,
GEN_TAC THEN REWRITE_TAC[divides] THEN
DISCH_THEN(MP_TAC o GSYM) THEN
REWRITE_TAC[MULT_EQ_1] THEN
DISCH_THEN(CHOOSE_THEN (ACCEPT_TAC o CONJUNCT1)));;

let PRIME_FACTOR = prove
(`!n. 1 < n ==> ?p. prime(p) /\ p divides n`,
MATCH_MP_TAC num_WF THEN X_GEN_TAC `n:num` THEN DISCH_TAC THEN
DISCH_TAC THEN ASM_CASES_TAC `prime(n)` THENL
[EXISTS_TAC `n:num` THEN ASM_REWRITE_TAC[DIVIDES_REFL];
UNDISCH_TAC `~(prime(n))` THEN REWRITE_TAC[prime; NOT_FORALL_THM] THEN
ASM_REWRITE_TAC[GT; NOT_FORALL_THM; NOT_IMP; DE_MORGAN_THM] THEN
DISCH_THEN(X_CHOOSE_THEN `m:num` STRIP_ASSUME_TAC) THEN
REWRITE_TAC[GSYM prime; GSYM GT] THEN
SUBGOAL_THEN `?p. prime(p) /\ p divides m` MP_TAC THENL
[FIRST_ASSUM(MATCH_MP_TAC o REWRITE_RULE
[TAUT `a ==> b ==> c = a /\ b ==> c`]) THEN
ASM_REWRITE_TAC[LT_LE] THEN REPEAT CONJ_TAC THENL
[UNDISCH_TAC `1 < n` THEN
FIRST_ASSUM(DISJ_CASES_TAC o MATCH_MP DIVIDES_LE) THEN
ASM_REWRITE_TAC[ARITH];
REWRITE_TAC[BIT1; ADD_CLAUSES; GSYM NOT_LT; LT] THEN
DISCH_TAC THEN UNDISCH_TAC `m divides n` THEN
ASM_REWRITE_TAC[DIVIDES_ZERO] THEN DISCH_TAC THEN
UNDISCH_TAC `1 < n` THEN ASM_REWRITE_TAC[ARITH];
FIRST_ASSUM(ACCEPT_TAC o GSYM)];
DISCH_THEN(X_CHOOSE_TAC `p:num`) THEN EXISTS_TAC `p:num` THEN
ASM_REWRITE_TAC[] THEN MATCH_MP_TAC DIVIDES_TRANS THEN
EXISTS_TAC `m:num` THEN ASM_REWRITE_TAC[]]]);;

let EUCLID_LEMMA1 = prove
(`!x. x IN {p | prime(p)} ==> ~(x = 0)`,
GEN_TAC THEN CONV_TAC CONTRAPOS_CONV THEN REWRITE_TAC[] THEN
DISCH_TAC THEN ASM_REWRITE_TAC[GSPEC; IN; BETA_THM] THEN
ONCE_REWRITE_TAC[CONJ_SYM] THEN REWRITE_TAC[UNWIND_THM1] THEN
REWRITE_TAC[prime; ARITH]);;

let EUCLID_LEMMA2 = prove
(`!s. FINITE(s) /\ (!x. x IN s ==> ~(x = 0))
==> ?n. ~(n = 0) /\ !x. x IN s ==> x divides n`,
REWRITE_TAC[ITAUT `a /\ b ==> c = a ==> b ==> c`] THEN
MATCH_MP_TAC FINITE_INDUCT THEN REWRITE_TAC[NOT_IN_EMPTY] THEN CONJ_TAC THENL
[EXISTS_TAC `1` THEN REWRITE_TAC[ARITH];
MAP_EVERY X_GEN_TAC [`x:num`; `s:num->bool`] THEN DISCH_TAC THEN
DISCH_THEN(MP_TAC o REWRITE_RULE[IN_INSERT]) THEN
REWRITE_TAC[FORALL_AND_THM;
TAUT `a \/ b ==> c = (a ==> c) /\ (b ==> c)`] THEN
DISCH_THEN(CONJUNCTS_THEN2 (MP_TAC o SPEC `x:num`) MP_TAC) THEN
DISCH_THEN(fun th -> ANTE_RES_THEN MP_TAC th THEN ASSUME_TAC th) THEN
DISCH_THEN(X_CHOOSE_THEN `n:num` STRIP_ASSUME_TAC) THEN REWRITE_TAC[] THEN
DISCH_TAC THEN EXISTS_TAC `x * n` THEN ASM_REWRITE_TAC[MULT_EQ_0] THEN
X_GEN_TAC `y:num` THEN REWRITE_TAC[IN_INSERT] THEN
DISCH_THEN(DISJ_CASES_THEN2 SUBST1_TAC ASSUME_TAC) THENL
[REWRITE_TAC[divides] THEN EXISTS_TAC `n:num` THEN REFL_TAC;
MATCH_MP_TAC DIVIDES_MULT THEN
FIRST_ASSUM MATCH_MP_TAC THEN ASM_REWRITE_TAC[]]]);;

let EUCLID = prove
(`INFINITE { p | prime(p) }`,
REWRITE_TAC[INFINITE] THEN
DISCH_THEN(MP_TAC o C CONJ EUCLID_LEMMA1) THEN
DISCH_THEN(X_CHOOSE_THEN `N:num` MP_TAC o MATCH_MP EUCLID_LEMMA2) THEN
REWRITE_TAC[NOT_FORALL_THM; NOT_IMP] THEN STRIP_TAC THEN
MP_TAC(SPEC `N + 1` PRIME_FACTOR) THEN
REWRITE_TAC[LT_ADDR] THEN ASM_REWRITE_TAC[GSYM NOT_LE; LE] THEN
DISCH_THEN(X_CHOOSE_THEN `p:num` STRIP_ASSUME_TAC) THEN
FIRST_ASSUM(UNDISCH_TAC o assert is_forall o concl) THEN
DISCH_THEN(MP_TAC o SPEC `p:num`) THEN
REWRITE_TAC[IN; GSPEC; BETA_THM; NOT_IMP; LEFT_AND_EXISTS_THM] THEN
EXISTS_TAC `p:num` THEN ASM_REWRITE_TAC[] THEN DISCH_TAC THEN
UNDISCH_TAC `prime(p)` THEN REWRITE_TAC[prime; NOT_FORALL_THM] THEN
SUBGOAL_THEN `p = 1` (fun th -> REWRITE_TAC[th; ARITH]) THEN
MATCH_MP_TAC DIVIDES_1 THEN MATCH_MP_TAC DIVIDES_ADD THEN
EXISTS_TAC `N:num` THEN ASM_REWRITE_TAC[]);;