% MATLAB version 7.12.0.635 (R2011a) 64-bit (glnxa64)
% This program was created Andrew Coates and Alexey Ilchenko as a part of
% research done in the REU Site: Interdisciplinary Program in High Performance
% Computing at the University of Maryland, Baltimore County during Summer 2011.
% For more information, see our Tech. Report at www.umbc.edu/hpcf > Publications
% as Technical Report HPCF-2011-13. Code is last updated August 11, 2011.
%
% Calculates the expected entropy of a string of length n from a set of size 4
% as specificly applied to base pair analysis.
%
% The function EHn(p,n) takes in a vector p and a scalar n. p is of the form
% [ p(a) p(t) p(c) p(g) ]. n is the sample size. Prints out the value of the
% expected entropy when done.
function [sum] = EHn(p,n)
cc = Counts(n);
sum = 0;
for i = 1:length(cc)
C = cc(i,:);
ps = prod(p.^C);
hs = Hs(C,n);
sum = sum + Multinomial(C,n)*hs*ps;
end
end
% Computes the Multinomial Coefficient given the counts of A,T,C,G and the
% sample size n
function [ans] = Multinomial(C,n)
[M,I] = max(C);
ans = 1;
for j = M+1:n
ans = ans*j;
end
C(I) = 1;
ans = ans / prod(factorial(C));
end
% Computes H(s) in the entropy equation by passing in the counts of
% A,T,C,G and the sample size n
function [hs] = Hs(C,n)
probs = C/n;
hs = 0;
for prob = probs
if prob ~= 0
hs = hs - prob*log2(prob);
end
end
end
% Creates the Compositions Matrix, O(n^3) x 4, which contains rows of the
% from [count A, count T, count C, count G]
function cc = Counts(n)
cc = zeros((n+3)*(n+2)*(n+1)/6,4);
m = 0;
for a = 0:n
for b = 0:n
for c = 0:n
if a + b + c <= n
d = n - (a+b+c);
m = m+1;
cc(m,:) = [a,b,c,d];
end
end
end
end
end