:: Constructing Binary {H}uffman Tree
:: by Hiroyuki Okazaki , Yuichi Futa and Yasunari Shidama
::
:: Copyright (c) 2013-2019 Association of Mizar Users

registration
let D be non empty set ;
let x be Element of D;
coherence ;
end;

definition
correctness
coherence ;
;
end;

:: deftheorem defines IndexedREAL HUFFMAN1:def 1 :

registration
cluster IndexedREAL -> non empty ;
coherence
not IndexedREAL is empty
;
end;

definition
let D be non empty set ;
func BinFinTrees D -> DTree-set of D means :Def2: :: HUFFMAN1:def 2
for T being DecoratedTree of D holds
( ( dom T is finite & T is binary ) iff T in it );
existence
ex b1 being DTree-set of D st
for T being DecoratedTree of D holds
( ( dom T is finite & T is binary ) iff T in b1 )
proof end;
uniqueness
for b1, b2 being DTree-set of D st ( for T being DecoratedTree of D holds
( ( dom T is finite & T is binary ) iff T in b1 ) ) & ( for T being DecoratedTree of D holds
( ( dom T is finite & T is binary ) iff T in b2 ) ) holds
b1 = b2
proof end;
end;

:: deftheorem Def2 defines BinFinTrees HUFFMAN1:def 2 :
for D being non empty set
for b2 being DTree-set of D holds
( b2 = BinFinTrees D iff for T being DecoratedTree of D holds
( ( dom T is finite & T is binary ) iff T in b2 ) );

definition
let D be non empty set ;
func BoolBinFinTrees D -> non empty Subset of (bool ()) equals :: HUFFMAN1:def 3
{ x where x is Element of bool () : ( x is finite & x <> {} ) } ;
correctness
coherence
{ x where x is Element of bool () : ( x is finite & x <> {} ) } is non empty Subset of (bool ())
;
proof end;
end;

:: deftheorem defines BoolBinFinTrees HUFFMAN1:def 3 :
for D being non empty set holds BoolBinFinTrees D = { x where x is Element of bool () : ( x is finite & x <> {} ) } ;

definition
let SOURCE be non empty finite set ;
let p be Probability of Trivial-SigmaField SOURCE;
func Initial-Trees p -> non empty finite Subset of equals :: HUFFMAN1:def 4
{ T where T is Element of FinTrees IndexedREAL : ( T is finite binary DecoratedTree of IndexedREAL & ex x being Element of SOURCE st T = root-tree [(((canFS SOURCE) ") . x),(p . {x})] ) } ;
correctness
coherence
{ T where T is Element of FinTrees IndexedREAL : ( T is finite binary DecoratedTree of IndexedREAL & ex x being Element of SOURCE st T = root-tree [(((canFS SOURCE) ") . x),(p . {x})] ) } is non empty finite Subset of
;
proof end;
end;

:: deftheorem defines Initial-Trees HUFFMAN1:def 4 :
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE holds Initial-Trees p = { T where T is Element of FinTrees IndexedREAL : ( T is finite binary DecoratedTree of IndexedREAL & ex x being Element of SOURCE st T = root-tree [(((canFS SOURCE) ") . x),(p . {x})] ) } ;

definition
let p be DecoratedTree of IndexedREAL ;
{} in dom p by TREES_1:22;
then p . {} in rng p by FUNCT_1:3;
then A1: ex x, y being object st
( x in NAT & y in REAL & p . {} = [x,y] ) by ZFMISC_1:def 2;
func Vrootr p -> Real equals :: HUFFMAN1:def 5
(p . {}) 2 ;
correctness
coherence
(p . {}) 2 is Real
;
by A1;
func Vrootl p -> Nat equals :: HUFFMAN1:def 6
(p . {}) 1 ;
correctness
coherence
(p . {}) 1 is Nat
;
by A1;
end;

:: deftheorem defines Vrootr HUFFMAN1:def 5 :
for p being DecoratedTree of IndexedREAL holds Vrootr p = (p . {}) 2 ;

:: deftheorem defines Vrootl HUFFMAN1:def 6 :
for p being DecoratedTree of IndexedREAL holds Vrootl p = (p . {}) 1 ;

definition
let T be finite binary DecoratedTree of IndexedREAL ;
let p be Element of dom T;
func Vtree p -> Real equals :: HUFFMAN1:def 7
(T . p) 2 ;
correctness
coherence
(T . p) 2 is Real
;
proof end;
end;

:: deftheorem defines Vtree HUFFMAN1:def 7 :
for T being finite binary DecoratedTree of IndexedREAL
for p being Element of dom T holds Vtree p = (T . p) 2 ;

definition
let p, q be finite binary DecoratedTree of IndexedREAL ;
let k be Nat;
func MakeTree (p,q,k) -> finite binary DecoratedTree of IndexedREAL equals :: HUFFMAN1:def 8
[k,(() + ())] -tree (p,q);
correctness
coherence
[k,(() + ())] -tree (p,q) is finite binary DecoratedTree of IndexedREAL
;
proof end;
end;

:: deftheorem defines MakeTree HUFFMAN1:def 8 :
for p, q being finite binary DecoratedTree of IndexedREAL
for k being Nat holds MakeTree (p,q,k) = [k,(() + ())] -tree (p,q);

definition
let X be non empty finite Subset of ;
func MaxVl X -> Nat means :Def9: :: HUFFMAN1:def 9
ex L being non empty finite Subset of NAT st
( L = { () where p is Element of BinFinTrees IndexedREAL : p in X } & it = max L );
existence
ex b1 being Nat ex L being non empty finite Subset of NAT st
( L = { () where p is Element of BinFinTrees IndexedREAL : p in X } & b1 = max L )
proof end;
uniqueness
for b1, b2 being Nat st ex L being non empty finite Subset of NAT st
( L = { () where p is Element of BinFinTrees IndexedREAL : p in X } & b1 = max L ) & ex L being non empty finite Subset of NAT st
( L = { () where p is Element of BinFinTrees IndexedREAL : p in X } & b2 = max L ) holds
b1 = b2
;
end;

:: deftheorem Def9 defines MaxVl HUFFMAN1:def 9 :
for X being non empty finite Subset of
for b2 being Nat holds
( b2 = MaxVl X iff ex L being non empty finite Subset of NAT st
( L = { () where p is Element of BinFinTrees IndexedREAL : p in X } & b2 = max L ) );

theorem :: HUFFMAN1:1
for X being non empty finite Subset of
for w being finite binary DecoratedTree of IndexedREAL st X = {w} holds
MaxVl X = Vrootl w
proof end;

theorem Th2: :: HUFFMAN1:2
for X, Y, Z being non empty finite Subset of st Z = X \/ Y holds
MaxVl Z = max ((),())
proof end;

theorem :: HUFFMAN1:3
for X, Z being non empty finite Subset of
for Y being set st Z = X \ Y holds
MaxVl Z <= MaxVl X
proof end;

theorem :: HUFFMAN1:4
for X being non empty finite Subset of
for p being Element of BinFinTrees IndexedREAL st p in X holds
Vrootl p <= MaxVl X
proof end;

defpred S1[ non empty finite Subset of , finite binary DecoratedTree of IndexedREAL ] means ( $2 in$1 & ( for q being finite binary DecoratedTree of IndexedREAL st q in $1 holds Vrootr$2 <= Vrootr q ) );

definition
let X be non empty finite Subset of ;
mode MinValueTree of X -> finite binary DecoratedTree of IndexedREAL means :Def10: :: HUFFMAN1:def 10
( it in X & ( for q being finite binary DecoratedTree of IndexedREAL st q in X holds
Vrootr it <= Vrootr q ) );
existence
ex b1 being finite binary DecoratedTree of IndexedREAL st
( b1 in X & ( for q being finite binary DecoratedTree of IndexedREAL st q in X holds
Vrootr b1 <= Vrootr q ) )
proof end;
end;

:: deftheorem Def10 defines MinValueTree HUFFMAN1:def 10 :
for X being non empty finite Subset of
for b2 being finite binary DecoratedTree of IndexedREAL holds
( b2 is MinValueTree of X iff ( b2 in X & ( for q being finite binary DecoratedTree of IndexedREAL st q in X holds
Vrootr b2 <= Vrootr q ) ) );

Lm1: for X being set
for x being object st ex u, v being object st
( u <> v & u in X & v in X ) holds
X \ {x} <> {}

proof end;

Lm2: for X, Y being set
for t, s, z being object st X c= Y & t in Y & s in Y & z in Y holds
(X \ {t,s}) \/ {z} c= Y

proof end;

Lm3: for X being finite set st 2 <= card X holds
ex u, v being object st
( u <> v & u in X & v in X )

proof end;

Lm4: for X being finite set st 1 = card X holds
ex u being object st X = {u}

proof end;

Lm5: for X being finite set
for t, s, z being object st t in X & s in X & t <> s & not z in X holds
card ((X \ {t,s}) \/ {z}) = (card X) - 1

proof end;

theorem Th5: :: HUFFMAN1:5
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE holds card () = card SOURCE
proof end;

theorem Th6: :: HUFFMAN1:6
for X being non empty finite Subset of
for s, t being finite binary DecoratedTree of IndexedREAL holds not MakeTree (t,s,(() + 1)) in X
proof end;

definition
let X be set ;
func LeavesSet X -> Subset of equals :: HUFFMAN1:def 11
{ () where p is Element of BinFinTrees IndexedREAL : p in X } ;
correctness
coherence
{ () where p is Element of BinFinTrees IndexedREAL : p in X } is Subset of
;
proof end;
end;

:: deftheorem defines LeavesSet HUFFMAN1:def 11 :
for X being set holds LeavesSet X = { () where p is Element of BinFinTrees IndexedREAL : p in X } ;

theorem Th7: :: HUFFMAN1:7
for X being finite binary DecoratedTree of IndexedREAL holds LeavesSet {X} = {()}
proof end;

theorem Th8: :: HUFFMAN1:8
for X, Y being set holds LeavesSet (X \/ Y) = () \/ ()
proof end;

theorem Th9: :: HUFFMAN1:9
for s, t being Tree holds not {} in Leaves (tree (t,s))
proof end;

theorem Th10: :: HUFFMAN1:10
for s, t being Tree holds Leaves (tree (t,s)) = { () where p is Element of t : p in Leaves t } \/ { (<*1*> ^ p) where p is Element of s : p in Leaves s }
proof end;

theorem Th11: :: HUFFMAN1:11
for s, t being DecoratedTree
for x being object
for q being FinSequence of NAT st q in dom t holds
(x -tree (t,s)) . () = t . q
proof end;

theorem Th12: :: HUFFMAN1:12
for s, t being DecoratedTree
for x being object
for q being FinSequence of NAT st q in dom s holds
(x -tree (t,s)) . (<*1*> ^ q) = s . q
proof end;

theorem Th13: :: HUFFMAN1:13
for s, t being DecoratedTree
for x being object holds Leaves (x -tree (t,s)) = () \/ ()
proof end;

theorem Th14: :: HUFFMAN1:14
for k being Nat
for s, t being finite binary DecoratedTree of IndexedREAL holds union (LeavesSet {s,t}) = union (LeavesSet {(MakeTree (t,s,k))})
proof end;

theorem Th15: :: HUFFMAN1:15
proof end;

theorem Th16: :: HUFFMAN1:16
for x being object
for D being non empty set
for T being finite binary DecoratedTree of D st T = root-tree x holds
Leaves T = {x}
proof end;

definition
let SOURCE be non empty finite set ;
let p be Probability of Trivial-SigmaField SOURCE;
let Tseq be FinSequence of BoolBinFinTrees IndexedREAL;
let q be FinSequence of NAT ;
pred Tseq,q,p is_constructingBinHuffmanTree means :Def12: :: HUFFMAN1:def 12
( Tseq . 1 = Initial-Trees p & len Tseq = card SOURCE & ( for i being Nat st 1 <= i & i < len Tseq holds
ex X, Y being non empty finite Subset of ex s being MinValueTree of X ex t being MinValueTree of Y ex v being finite binary DecoratedTree of IndexedREAL st
( Tseq . i = X & Y = X \ {s} & v in {(MakeTree (t,s,(() + 1))),(MakeTree (s,t,(() + 1)))} & Tseq . (i + 1) = (X \ {t,s}) \/ {v} ) ) & ex T being finite binary DecoratedTree of IndexedREAL st {T} = Tseq . (len Tseq) & dom q = Seg (card SOURCE) & ( for k being Nat st k in Seg (card SOURCE) holds
( q . k = card (Tseq . k) & q . k <> 0 ) ) & ( for k being Nat st k < card SOURCE holds
q . (k + 1) = (q . 1) - k ) & ( for k being Nat st 1 <= k & k < card SOURCE holds
2 <= q . k ) );
end;

:: deftheorem Def12 defines is_constructingBinHuffmanTree HUFFMAN1:def 12 :
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT holds
( Tseq,q,p is_constructingBinHuffmanTree iff ( Tseq . 1 = Initial-Trees p & len Tseq = card SOURCE & ( for i being Nat st 1 <= i & i < len Tseq holds
ex X, Y being non empty finite Subset of ex s being MinValueTree of X ex t being MinValueTree of Y ex v being finite binary DecoratedTree of IndexedREAL st
( Tseq . i = X & Y = X \ {s} & v in {(MakeTree (t,s,(() + 1))),(MakeTree (s,t,(() + 1)))} & Tseq . (i + 1) = (X \ {t,s}) \/ {v} ) ) & ex T being finite binary DecoratedTree of IndexedREAL st {T} = Tseq . (len Tseq) & dom q = Seg (card SOURCE) & ( for k being Nat st k in Seg (card SOURCE) holds
( q . k = card (Tseq . k) & q . k <> 0 ) ) & ( for k being Nat st k < card SOURCE holds
q . (k + 1) = (q . 1) - k ) & ( for k being Nat st 1 <= k & k < card SOURCE holds
2 <= q . k ) ) );

theorem Th17: :: HUFFMAN1:17
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE ex Tseq being FinSequence of BoolBinFinTrees IndexedREAL ex q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree
proof end;

definition
let SOURCE be non empty finite set ;
let p be Probability of Trivial-SigmaField SOURCE;
mode BinHuffmanTree of p -> finite binary DecoratedTree of IndexedREAL means :Def13: :: HUFFMAN1:def 13
ex Tseq being FinSequence of BoolBinFinTrees IndexedREAL ex q being FinSequence of NAT st
( Tseq,q,p is_constructingBinHuffmanTree & {it} = Tseq . (len Tseq) );
existence
ex b1 being finite binary DecoratedTree of IndexedREAL ex Tseq being FinSequence of BoolBinFinTrees IndexedREAL ex q being FinSequence of NAT st
( Tseq,q,p is_constructingBinHuffmanTree & {b1} = Tseq . (len Tseq) )
proof end;
end;

:: deftheorem Def13 defines BinHuffmanTree HUFFMAN1:def 13 :
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for b3 being finite binary DecoratedTree of IndexedREAL holds
( b3 is BinHuffmanTree of p iff ex Tseq being FinSequence of BoolBinFinTrees IndexedREAL ex q being FinSequence of NAT st
( Tseq,q,p is_constructingBinHuffmanTree & {b3} = Tseq . (len Tseq) ) );

theorem Th18: :: HUFFMAN1:18
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE holds union () = { z where z is Element of : ex x being Element of SOURCE st z = [(((canFS SOURCE) ") . x),(p . {x})] }
proof end;

theorem Th19: :: HUFFMAN1:19
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat st 1 <= i & i <= len Tseq holds
union (LeavesSet (Tseq . i)) = { z where z is Element of : ex x being Element of SOURCE st z = [(((canFS SOURCE) ") . x),(p . {x})] }
proof end;

theorem :: HUFFMAN1:20
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for T being BinHuffmanTree of p holds Leaves T = { z where z is Element of : ex x being Element of SOURCE st z = [(((canFS SOURCE) ") . x),(p . {x})] }
proof end;

theorem Th21: :: HUFFMAN1:21
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for T being finite binary DecoratedTree of IndexedREAL
for t, s, r being Element of dom T st T in Tseq . i & t in (dom T) \ (Leaves (dom T)) & s = t ^ & r = t ^ <*1*> holds
Vtree t = () + ()
proof end;

theorem :: HUFFMAN1:22
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for T being BinHuffmanTree of p
for t, s, r being Element of dom T st t in (dom T) \ (Leaves (dom T)) & s = t ^ & r = t ^ <*1*> holds
Vtree t = () + ()
proof end;

theorem Th23: :: HUFFMAN1:23
for X being non empty finite Subset of st ( for T being finite binary DecoratedTree of IndexedREAL st T in X holds
for p being Element of dom T
for r being Element of NAT st r = (T . p) 1 holds
r <= MaxVl X ) holds
for s, t, w being finite binary DecoratedTree of IndexedREAL st s in X & t in X & w = MakeTree (t,s,(() + 1)) holds
for p being Element of dom w
for r being Element of NAT st r = (w . p) 1 holds
r <= () + 1
proof end;

theorem Th24: :: HUFFMAN1:24
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat st 1 <= i & i < len Tseq holds
for X, Y being non empty finite Subset of st X = Tseq . i & Y = Tseq . (i + 1) holds
MaxVl Y = () + 1
proof end;

theorem :: HUFFMAN1:25
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for X being non empty finite Subset of st X = Tseq . i holds
for T being finite binary DecoratedTree of IndexedREAL st T in X holds
for p being Element of dom T
for r being Element of NAT st r = (T . p) 1 holds
r <= MaxVl X
proof end;

theorem Th26: :: HUFFMAN1:26
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for X being non empty finite Subset of st X = Tseq . i holds
for T being finite binary DecoratedTree of IndexedREAL st T in X holds
for p being Element of dom T
for r being Element of NAT st r = (T . p) 1 holds
r <= MaxVl X
proof end;

theorem Th27: :: HUFFMAN1:27
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for s, t being finite binary DecoratedTree of IndexedREAL
for X being non empty finite Subset of st X = Tseq . i & s in X & t in X holds
for z being finite binary DecoratedTree of IndexedREAL st z in X holds
not [(() + 1),(() + ())] in rng z
proof end;

registration
let x be object ;
coherence
proof end;
end;

theorem Th28: :: HUFFMAN1:28
for X being non empty finite Subset of
for s, t, w being finite binary DecoratedTree of IndexedREAL st ( for T being finite binary DecoratedTree of IndexedREAL st T in X holds
for p being Element of dom T
for r being Element of NAT st r = (T . p) 1 holds
r <= MaxVl X ) & ( for p, q being finite binary DecoratedTree of IndexedREAL st p in X & q in X & p <> q holds
(rng p) /\ (rng q) = {} ) & s in X & t in X & s <> t & w in X \ {s,t} holds
(rng (MakeTree (t,s,(() + 1)))) /\ (rng w) = {}
proof end;

theorem Th29: :: HUFFMAN1:29
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for T, S being finite binary DecoratedTree of IndexedREAL st T in Tseq . i & S in Tseq . i & T <> S holds
(rng T) /\ (rng S) = {}
proof end;

theorem Th30: :: HUFFMAN1:30
for X being non empty finite Subset of
for s, t being finite binary DecoratedTree of IndexedREAL st s is one-to-one & t is one-to-one & t in X & s in X & (rng s) /\ (rng t) = {} & ( for z being finite binary DecoratedTree of IndexedREAL st z in X holds
not [(() + 1),(() + ())] in rng z ) holds
MakeTree (t,s,(() + 1)) is one-to-one
proof end;

theorem Th31: :: HUFFMAN1:31
for SOURCE being non empty finite set
for p being Probability of Trivial-SigmaField SOURCE
for Tseq being FinSequence of BoolBinFinTrees IndexedREAL
for q being FinSequence of NAT st Tseq,q,p is_constructingBinHuffmanTree holds
for i being Nat
for T being finite binary DecoratedTree of IndexedREAL st T in Tseq . i holds
T is one-to-one
proof end;

registration
let SOURCE be non empty finite set ;
let p be Probability of Trivial-SigmaField SOURCE;
cluster -> one-to-one for BinHuffmanTree of p;
coherence
for b1 being BinHuffmanTree of p holds b1 is one-to-one
proof end;
end;