% MATLAB version 7.12.0.635 (R2011a) 64-bit (glnxa64) % This program was created Andrew Coates and Alexey Ilchenko as a part of % research done in the REU Site: Interdisciplinary Program in High Performance % Computing at the University of Maryland, Baltimore County during Summer 2011. % For more information, see our Tech. Report at www.umbc.edu/hpcf > Publications % as Technical Report HPCF-2011-13. Code is last updated August 11, 2011. % % Calculates the expected entropy of a string of length n from a set of size 4 % as specificly applied to base pair analysis. % % The function EHn(p,n) takes in a vector p and a scalar n. p is of the form % [ p(a) p(t) p(c) p(g) ]. n is the sample size. Prints out the value of the % expected entropy when done. function [sum] = EHn(p,n) cc = Counts(n); sum = 0; for i = 1:length(cc) C = cc(i,:); ps = prod(p.^C); hs = Hs(C,n); sum = sum + Multinomial(C,n)*hs*ps; end end % Computes the Multinomial Coefficient given the counts of A,T,C,G and the % sample size n function [ans] = Multinomial(C,n) [M,I] = max(C); ans = 1; for j = M+1:n ans = ans*j; end C(I) = 1; ans = ans / prod(factorial(C)); end % Computes H(s) in the entropy equation by passing in the counts of % A,T,C,G and the sample size n function [hs] = Hs(C,n) probs = C/n; hs = 0; for prob = probs if prob ~= 0 hs = hs - prob*log2(prob); end end end % Creates the Compositions Matrix, O(n^3) x 4, which contains rows of the % from [count A, count T, count C, count G] function cc = Counts(n) cc = zeros((n+3)*(n+2)*(n+1)/6,4); m = 0; for a = 0:n for b = 0:n for c = 0:n if a + b + c <= n d = n - (a+b+c); m = m+1; cc(m,:) = [a,b,c,d]; end end end end end