:: Probability on Finite and Discrete Set and Uniform Distribution
:: by Hiroyuki Okazaki
::
:: Received May 5, 2009
:: Copyright (c) 2009 Association of Mizar Users



notation
let S be non empty finite set ;
let s be FinSequence of S;
synonym whole_event s for dom S;
end;

definition
let S be non empty finite set ;
let s be non empty FinSequence of S;
:: original: whole_event
redefine func whole_event s -> non empty finite set ;
correctness
coherence
whole_event is non empty finite set
;
;
end;

theorem defwe: :: DIST_1:1
for S being non empty finite set
for s being FinSequence of S holds whole_event = s " S
proof end;

notation
let S be non empty finite set ;
let s be FinSequence of S;
let x be set ;
synonym event_pick x,s for Coim S,s;
end;

definition
let S be non empty finite set ;
let s be FinSequence of S;
let x be set ;
:: original: event_pick
redefine func event_pick x,s -> Event of (whole_event );
correctness
coherence
event_pick ,x is Event of (whole_event )
;
by RELAT_1:167;
end;

definition
let S be non empty finite set ;
let s be FinSequence of S;
let x be set ;
func frequency x,s -> Nat equals :: DIST_1:def 1
card (event_pick x,s);
correctness
coherence
card (event_pick x,s) is Nat
;
;
end;

:: deftheorem defines frequency DIST_1:def 1 :
for S being non empty finite set
for s being FinSequence of S
for x being set holds frequency x,s = card (event_pick x,s);

theorem :: DIST_1:2
for S being non empty finite set
for s being FinSequence of S
for e being set st e in whole_event holds
ex x being Element of S st e in event_pick x,s
proof end;

theorem nazo6: :: DIST_1:3
for S being non empty finite set
for s being FinSequence of S holds card (whole_event ) = len s
proof end;

definition
let S be non empty finite set ;
let s be FinSequence of S;
let x be set ;
func FDprobability x,s -> Real equals :: DIST_1:def 2
(frequency x,s) / (len s);
correctness
coherence
(frequency x,s) / (len s) is Real
;
by XREAL_0:def 1;
end;

:: deftheorem defines FDprobability DIST_1:def 2 :
for S being non empty finite set
for s being FinSequence of S
for x being set holds FDprobability x,s = (frequency x,s) / (len s);

theorem :: DIST_1:4
for S being non empty finite set
for s being FinSequence of S
for x being set holds frequency x,s = (len s) * (FDprobability x,s)
proof end;

definition
let S be non empty finite set ;
let s be FinSequence of S;
func FDprobSEQ s -> FinSequence of REAL means :defFREQDIST: :: DIST_1:def 3
( dom it = Seg (card S) & ( for n being Nat st n in dom it holds
it . n = FDprobability ((canFS S) . n),s ) );
existence
ex b1 being FinSequence of REAL st
( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = FDprobability ((canFS S) . n),s ) )
proof end;
uniqueness
for b1, b2 being FinSequence of REAL st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = FDprobability ((canFS S) . n),s ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds
b2 . n = FDprobability ((canFS S) . n),s ) holds
b1 = b2
proof end;
end;

:: deftheorem defFREQDIST defines FDprobSEQ DIST_1:def 3 :
for S being non empty finite set
for s being FinSequence of S
for b3 being FinSequence of REAL holds
( b3 = FDprobSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds
b3 . n = FDprobability ((canFS S) . n),s ) ) );

theorem :: DIST_1:5
for S being non empty finite set
for s being non empty FinSequence of S
for x being set holds FDprobability x,s = prob (event_pick x,s) by nazo6;

definition
let S be non empty finite set ;
let s, t be FinSequence of S;
pred s,t -are_prob_equivalent means :defequiv: :: DIST_1:def 4
for x being set holds FDprobability x,s = FDprobability x,t;
reflexivity
for s being FinSequence of S
for x being set holds FDprobability x,s = FDprobability x,s
;
symmetry
for s, t being FinSequence of S st ( for x being set holds FDprobability x,s = FDprobability x,t ) holds
for x being set holds FDprobability x,t = FDprobability x,s
;
end;

:: deftheorem defequiv defines -are_prob_equivalent DIST_1:def 4 :
for S being non empty finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff for x being set holds FDprobability x,s = FDprobability x,t );

theorem EQX01: :: DIST_1:6
for S being non empty finite set
for s, t, u being FinSequence of S st s,t -are_prob_equivalent & t,u -are_prob_equivalent holds
s,u -are_prob_equivalent
proof end;

definition
let S be non empty finite set ;
let s be FinSequence of S;
func Finseq-EQclass s -> non empty Subset of (S * ) equals :: DIST_1:def 5
{ t where t is FinSequence of S : s,t -are_prob_equivalent } ;
correctness
coherence
{ t where t is FinSequence of S : s,t -are_prob_equivalent } is non empty Subset of (S * )
;
proof end;
end;

:: deftheorem defines Finseq-EQclass DIST_1:def 5 :
for S being non empty finite set
for s being FinSequence of S holds Finseq-EQclass s = { t where t is FinSequence of S : s,t -are_prob_equivalent } ;

theorem EQX02: :: DIST_1:7
for S being non empty finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff t in Finseq-EQclass s )
proof end;

theorem EQX03: :: DIST_1:8
for S being non empty finite set
for s being FinSequence of S holds s in Finseq-EQclass s ;

theorem EQX04: :: DIST_1:9
for S being non empty finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff Finseq-EQclass s = Finseq-EQclass t )
proof end;

definition
let S be non empty finite set ;
defpred S1[ set ] means ex u being FinSequence of S st $1 = Finseq-EQclass u;
func distribution_family S -> non empty Subset-Family of (S * ) means :defQuot: :: DIST_1:def 6
for A being Subset of (S * ) holds
( A in it iff ex s being FinSequence of S st A = Finseq-EQclass s );
existence
ex b1 being non empty Subset-Family of (S * ) st
for A being Subset of (S * ) holds
( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s )
proof end;
uniqueness
for b1, b2 being non empty Subset-Family of (S * ) st ( for A being Subset of (S * ) holds
( A in b1 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) & ( for A being Subset of (S * ) holds
( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) ) holds
b1 = b2
proof end;
end;

:: deftheorem defQuot defines distribution_family DIST_1:def 6 :
for S being non empty finite set
for b2 being non empty Subset-Family of (S * ) holds
( b2 = distribution_family S iff for A being Subset of (S * ) holds
( A in b2 iff ex s being FinSequence of S st A = Finseq-EQclass s ) );

theorem EQX05: :: DIST_1:10
for S being non empty finite set
for s, t being FinSequence of S holds
( s,t -are_prob_equivalent iff FDprobSEQ s = FDprobSEQ t )
proof end;

theorem :: DIST_1:11
for S being non empty finite set
for s, t being FinSequence of S st t in Finseq-EQclass s holds
FDprobSEQ s = FDprobSEQ t
proof end;

definition
let S be non empty finite set ;
func GenProbSEQ S -> Function of (distribution_family S),(REAL * ) means :defFREQDIST2: :: DIST_1:def 7
for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & it . x = FDprobSEQ s );
existence
ex b1 being Function of (distribution_family S),(REAL * ) st
for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b1 . x = FDprobSEQ s )
proof end;
uniqueness
for b1, b2 being Function of (distribution_family S),(REAL * ) st ( for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b1 . x = FDprobSEQ s ) ) & ( for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b2 . x = FDprobSEQ s ) ) holds
b1 = b2
proof end;
end;

:: deftheorem defFREQDIST2 defines GenProbSEQ DIST_1:def 7 :
for S being non empty finite set
for b2 being Function of (distribution_family S),(REAL * ) holds
( b2 = GenProbSEQ S iff for x being Element of distribution_family S ex s being FinSequence of S st
( s in x & b2 . x = FDprobSEQ s ) );

theorem EQX07: :: DIST_1:12
for S being non empty finite set
for s being FinSequence of S holds (GenProbSEQ S) . (Finseq-EQclass s) = FDprobSEQ s
proof end;

EQX08: for S being non empty finite set holds GenProbSEQ S is one-to-one
proof end;

registration
let S be non empty finite set ;
cluster GenProbSEQ S -> one-to-one ;
coherence
GenProbSEQ S is one-to-one
by EQX08;
end;

definition
let S be non empty finite set ;
let p be ProbFinS FinSequence of REAL ;
assume AS: ( len p = card S & ex s being FinSequence of S st FDprobSEQ s = p ) ;
func distribution p,S -> Element of distribution_family S means :defBUNBPU: :: DIST_1:def 8
(GenProbSEQ S) . it = p;
existence
ex b1 being Element of distribution_family S st (GenProbSEQ S) . b1 = p
proof end;
uniqueness
for b1, b2 being Element of distribution_family S st (GenProbSEQ S) . b1 = p & (GenProbSEQ S) . b2 = p holds
b1 = b2
proof end;
end;

:: deftheorem defBUNBPU defines distribution DIST_1:def 8 :
for S being non empty finite set
for p being ProbFinS FinSequence of REAL st len p = card S & ex s being FinSequence of S st FDprobSEQ s = p holds
for b3 being Element of distribution_family S holds
( b3 = distribution p,S iff (GenProbSEQ S) . b3 = p );

definition
let S be non empty finite set ;
let s be FinSequence of S;
func freqSEQ s -> FinSequence of NAT means :defDIST: :: DIST_1:def 9
( dom it = Seg (card S) & ( for n being Nat st n in dom it holds
it . n = (len s) * ((FDprobSEQ s) . n) ) );
existence
ex b1 being FinSequence of NAT st
( dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = (len s) * ((FDprobSEQ s) . n) ) )
proof end;
uniqueness
for b1, b2 being FinSequence of NAT st dom b1 = Seg (card S) & ( for n being Nat st n in dom b1 holds
b1 . n = (len s) * ((FDprobSEQ s) . n) ) & dom b2 = Seg (card S) & ( for n being Nat st n in dom b2 holds
b2 . n = (len s) * ((FDprobSEQ s) . n) ) holds
b1 = b2
proof end;
end;

:: deftheorem defDIST defines freqSEQ DIST_1:def 9 :
for S being non empty finite set
for s being FinSequence of S
for b3 being FinSequence of NAT holds
( b3 = freqSEQ s iff ( dom b3 = Seg (card S) & ( for n being Nat st n in dom b3 holds
b3 . n = (len s) * ((FDprobSEQ s) . n) ) ) );

theorem nazonazo: :: DIST_1:13
for S being non empty finite set
for s being non empty FinSequence of S
for n being Nat st n in Seg (card S) holds
ex x being Element of S st
( (freqSEQ s) . n = frequency x,s & x = (canFS S) . n )
proof end;

theorem nnnazo1: :: DIST_1:14
for S being non empty finite set
for s being FinSequence of S holds freqSEQ s = (len s) * (FDprobSEQ s)
proof end;

theorem nnazo2: :: DIST_1:15
for S being non empty finite set
for s being FinSequence of S holds Sum (freqSEQ s) = (len s) * (Sum (FDprobSEQ s))
proof end;

theorem :: DIST_1:16
for S being non empty finite set
for s being non empty FinSequence of S
for n being Nat st n in dom s holds
ex m being Nat st
( (freqSEQ s) . m = frequency (s . n),s & s . n = (canFS S) . m )
proof end;

theorem LMCARD1: :: DIST_1:17
for n being Nat
for S being Function
for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & n = len L & ( for i being Nat st i in dom S holds
( S . i is finite & L . i = card (S . i) ) ) holds
( union (rng S) is finite & card (union (rng S)) = Sum L )
proof end;

theorem LMCARD: :: DIST_1:18
for S being Function
for L being FinSequence of NAT st S is disjoint_valued & dom S = dom L & ( for i being Nat st i in dom S holds
( S . i is finite & L . i = card (S . i) ) ) holds
( union (rng S) is finite & card (union (rng S)) = Sum L )
proof end;

theorem DISTSUM: :: DIST_1:19
for S being non empty finite set
for s being non empty FinSequence of S holds Sum (freqSEQ s) = len s
proof end;

theorem FREQDISTSUM: :: DIST_1:20
for S being non empty finite set
for s being non empty FinSequence of S holds Sum (FDprobSEQ s) = 1
proof end;

theorem FREQDISTSUM2: :: DIST_1:21
for S being non empty finite set
for s being non empty FinSequence of S holds FDprobSEQ s is ProbFinS
proof end;

definition
let S be non empty finite set ;
mode distProbFinS of S -> ProbFinS FinSequence of REAL means :defdistProbFinS: :: DIST_1:def 10
( len it = card S & ex s being FinSequence of S st FDprobSEQ s = it );
existence
ex b1 being ProbFinS FinSequence of REAL st
( len b1 = card S & ex s being FinSequence of S st FDprobSEQ s = b1 )
proof end;
end;

:: deftheorem defdistProbFinS defines distProbFinS DIST_1:def 10 :
for S being non empty finite set
for b2 being ProbFinS FinSequence of REAL holds
( b2 is distProbFinS of S iff ( len b2 = card S & ex s being FinSequence of S st FDprobSEQ s = b2 ) );

theorem pbf: :: DIST_1:22
for S being non empty finite set
for p being distProbFinS of S holds
( p is ProbFinS FinSequence of REAL & len p = card S & ex s being FinSequence of S st FDprobSEQ s = p & distribution p,S is Element of distribution_family S & (GenProbSEQ S) . (distribution p,S) = p )
proof end;


definition
let S be non empty finite set ;
let s be FinSequence of S;
pred s is_uniformly_distributed means :defunidistseq: :: DIST_1:def 11
for n being Nat st n in dom (FDprobSEQ s) holds
(FDprobSEQ s) . n = 1 / (card S);
end;

:: deftheorem defunidistseq defines is_uniformly_distributed DIST_1:def 11 :
for S being non empty finite set
for s being FinSequence of S holds
( s is_uniformly_distributed iff for n being Nat st n in dom (FDprobSEQ s) holds
(FDprobSEQ s) . n = 1 / (card S) );

theorem THUDEQ0: :: DIST_1:23
for S being non empty finite set
for s being FinSequence of S st s is_uniformly_distributed holds
FDprobSEQ s is constant
proof end;

theorem THUDEQ: :: DIST_1:24
for S being non empty finite set
for s, t being FinSequence of S st s is_uniformly_distributed & s,t -are_prob_equivalent holds
t is_uniformly_distributed
proof end;

theorem THUDEQ2: :: DIST_1:25
for S being non empty finite set
for s, t being FinSequence of S st s is_uniformly_distributed & t is_uniformly_distributed holds
s,t -are_prob_equivalent
proof end;

theorem THUDS: :: DIST_1:26
for S being non empty finite set holds canFS S is_uniformly_distributed
proof end;

THCFS: for S being non empty finite set
for s being FinSequence of S st s in Finseq-EQclass (canFS S) holds
s is_uniformly_distributed
proof end;

THCFS2: for S being non empty finite set
for s being FinSequence of S st s is_uniformly_distributed holds
for t being FinSequence of S st t is_uniformly_distributed holds
t in Finseq-EQclass s
proof end;

definition
let S be non empty finite set ;
func uniform_distribution S -> Element of distribution_family S means :defunidist: :: DIST_1:def 12
for s being FinSequence of S holds
( s in it iff s is_uniformly_distributed );
existence
ex b1 being Element of distribution_family S st
for s being FinSequence of S holds
( s in b1 iff s is_uniformly_distributed )
proof end;
uniqueness
for b1, b2 being Element of distribution_family S st ( for s being FinSequence of S holds
( s in b1 iff s is_uniformly_distributed ) ) & ( for s being FinSequence of S holds
( s in b2 iff s is_uniformly_distributed ) ) holds
b1 = b2
proof end;
end;

:: deftheorem defunidist defines uniform_distribution DIST_1:def 12 :
for S being non empty finite set
for b2 being Element of distribution_family S holds
( b2 = uniform_distribution S iff for s being FinSequence of S holds
( s in b2 iff s is_uniformly_distributed ) );

registration
let S be non empty finite set ;
cluster constant ProbFinS distProbFinS of S;
existence
ex b1 being distProbFinS of S st b1 is constant
proof end;
end;

definition
let S be non empty finite set ;
func Uniform_FDprobSEQ S -> constant distProbFinS of S equals :: DIST_1:def 13
FDprobSEQ (canFS S);
coherence
FDprobSEQ (canFS S) is constant distProbFinS of S
proof end;
end;

:: deftheorem defines Uniform_FDprobSEQ DIST_1:def 13 :
for S being non empty finite set holds Uniform_FDprobSEQ S = FDprobSEQ (canFS S);

theorem :: DIST_1:27
for S being non empty finite set holds uniform_distribution S = distribution (Uniform_FDprobSEQ S),S
proof end;