Browse Source

initial commit

master
boB Rudis 7 years ago
parent
commit
8cf00da735
No known key found for this signature in database GPG Key ID: 1D7529BE14E2BBA9
  1. 3
      .Rbuildignore
  2. 24
      DESCRIPTION
  3. 19
      NAMESPACE
  4. 156
      R/aaa.r
  5. 46
      R/bmde.r
  6. 19
      R/bmdu.r
  7. 47
      R/fitz-scott.r
  8. 47
      R/goodman.r
  9. 18
      R/package.r
  10. 47
      R/qh.r
  11. 31
      R/scimple.r
  12. 18
      R/sison-glaz.r
  13. 49
      R/wald.r
  14. 46
      R/waldcc.r
  15. 47
      R/wilson.r
  16. 2
      R/zzz.r
  17. 35
      README.Rmd
  18. 98
      README.md
  19. BIN
      README_files/figure-markdown_github/unnamed-chunk-3-1.png
  20. 32
      man/scimp_bmde.Rd
  21. 32
      man/scimp_bmdu.Rd
  22. 30
      man/scimp_fs.Rd
  23. 30
      man/scimp_goodman.Rd
  24. 30
      man/scimp_qh.Rd
  25. 30
      man/scimp_sg.Rd
  26. 32
      man/scimp_wald.Rd
  27. 27
      man/scimp_waldcc.Rd
  28. 30
      man/scimp_wilson.Rd
  29. 21
      man/scimple.Rd
  30. 33
      man/scimple_ci.Rd
  31. 21
      scimple.Rproj

3
.Rbuildignore

@ -0,0 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^README\.Rmd$

24
DESCRIPTION

@ -1,14 +1,26 @@
Package: scimple
Title: scimple TITLE
Title: Tidy Simultaneous Confidence Intervals for Multinomial Proportions
Version: 0.1.0
Encoding: UTF-8
Authors@R: c(person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"))
Authors@R: c(
person("Bob", "Rudis", role = c("aut", "cre"), email = "bob@rud.is"),
person("M", "Subbiah", role = c("aut"), comment = c("Original package author"))
)
Maintainer: Bob Rudis <bob@rud.is>
Description: scimple DESCRIPTION
Depends: R (>= 3.2.0)
License: AGPL
Description: Methods for obtaining simultaneous confidence intervals for multinomial
proportions have been proposed by many authors and the present study include a variety
of widely applicable procedures. Seven classical methods (Wilson, Quesenberry and
Hurst, Goodman, Wald with and without continuity correction, Fitzpatrick and Scott,
Sison and Glaz) and Bayesian Dirichlet models are included in the package. The
advantage of MCMC pack has been exploited to derive the Dirichlet posterior directly
and this also helps in handling the Dirichlet prior parameters. This package is
prepared to have equal and unequal values for the Dirichlet prior distribution that
will provide better scope for data analysis and associated sensitivity analysis.
Depends: R (>= 3.2.0), MCMCpack
License: GPL-2
URL: https://github.com/hrbrmstr/scimple
BugReports: https://github.com/hrbrmstr/scimple/issues
LazyData: true
Suggests: testthat
Imports: purrr
Imports: dplyr, tibble, stats, purrr
RoxygenNote: 6.0.1

19
NAMESPACE

@ -0,0 +1,19 @@
# Generated by roxygen2: do not edit by hand
export(scimp_bmde)
export(scimp_bmdu)
export(scimp_fs)
export(scimp_goodman)
export(scimp_qh)
export(scimp_sg)
export(scimp_wald)
export(scimp_waldcc)
export(scimp_wilson)
export(scimple_ci)
import(MCMCpack)
import(stats)
import(tibble)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(purrr,map)
importFrom(purrr,map_df)

156
R/aaa.r

@ -0,0 +1,156 @@
SG <- function(x,alpha) {
t1=proc.time()
sgp=function(c) {
s=sum(x) ##SUM(Cell_Counts)
k=length(x)
b= x-c
a= x+c
###FINDING FACTORIAL MOMENTS-TRUNCATED POISSON
fm1=0
fm2=0
fm3=0
fm4=0
for (i in 1:k)
{
fm1[i]=x[i]*(ppois(a[i]-1,x[i])-ppois(b[i]-2,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm2[i]=x[i]^2*(ppois(a[i]-2,x[i])-ppois(b[i]-3,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm3[i]=x[i]^3*(ppois(a[i]-3,x[i])-ppois(b[i]-4,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
fm4[i]=x[i]^4*(ppois(a[i]-4,x[i])-ppois(b[i]-5,x[i]))/(ppois(a[i],x[i])-ppois(b[i]-1,x[i]))
}
##FINDING CENTRAL MOMENTS-TRUNCATED POISSON
m1=0
m2=0
m3=0
m4=0
m4t=0
for (i in 1:k)
{
m1[i]=fm1[i]
m2[i]=fm2[i]+fm1[i]-(fm1[i]*fm1[i])
m3[i]=fm3[i]+fm2[i]*(3-(3*fm1[i]))+(fm1[i]-(3*fm1[i]*fm1[i])+(2*fm1[i]^3))
m4[i]=fm4[i]+fm3[i]*(6-(4*fm1[i]))+fm2[i]*(7-(12*fm1[i])+(6*fm1[i]^2))+fm1[i]-(4*fm1[i]^2)+(6*fm1[i]^3)-(3*fm1[i]^4)
m4t[i] = m4[i]-(3*m2[i]^2)#Temporary Variable for next step
}
s1=sum(m1)
s2=sum(m2)
s3=sum(m3)
s4=sum(m4t)
##FINDING GAMMAS ---> EDGEWORTH EXPANSION
g1=s3/(s2^(3/2))
g2=s4/(s2^2)
##FINDING CHEBYSHEV-HERMITE POLYNOMIALS ---> EDGEWORTH EXPANSION
z=(s-s1)/sqrt(s2)
z2=z^2
z3=z^3
z4=z^4
z6=z^6
poly=1+g1*(z3-(3*z))/6+g2*(z4-(6*z2)+3)/24+(g1^2)*(z6-(15*z4)+(45*z2)-15)/72
f=poly*exp(-z2/2)/sqrt(2*pi)
##FINDING PROBABILITY FUNCTION BASED ON 'c'
pc=0
for (i in 1:k) pc[i] <- ppois(a[i],x[i])-ppois(b[i]-1,x[i])
pcp = prod(pc) #PRODUCT OF pc THAT HAS k ELEMENTS
pps = 1/dpois(s,s)#POISSON PROB FOR s WITH PARAMETER AS s
rp=pps*pcp*f/sqrt(s2)##REQUIRED PROBABILITY
rp
}
proc.time()-t1
t=proc.time()
y=0
s=sum(x)
M1=1
M2=s
c=M1:M2
M=length(c)
for (i in 1:M) y[i] <- round(sgp(c[i]), 4)
j=1
vc=0
while(j<=M){
if (y[j]<1-alpha && 1-alpha < y[j+1])
vc=j else
vc=vc
j = j+1
}
# vc##REQUIRED VALUE OF C
delta <- ((1-alpha)-y[vc])/(y[vc+1]-y[vc])
##FINDING LIMITS
sp <- x/s#SAMPLE PROPORTION
LL <- round(sp-(vc/s),4)#LOWER LIMIT
UL <- round(sp+(vc/s)+(2*delta/s),4)#UPPER LIMIT
LLA <- ULA <- 0
for (r in 1:length(x)) {
if ( LL [r]< 0) LLA[r] = 0 else LLA[r]=LL[r]
if (UL[r] > 1) ULA[r] = 1 else ULA[r]=UL[r]
}
diA <- ULA-LLA##FIND LENGTH OF CIs
VOL <- round(prod(diA),8)##PRODUCT OF LENGTH OF CIs
data_frame(
method = "sg",
lower_limit = LL,
upper_limit = UL,
adj_ll = LLA,
adj_ul = ULA,
volume = VOL
)
}
BMDU <- function(x, d, seed=1492) {
set.seed(seed)
k <- length(x)
for(m in 1:k) {
if(x[m]<0) { warning('Arguments must be non-negative integers') }
}
if(d>=1 && d<=k) {
m=0
l=0
u=0
diff=0
s=sum(x)
s1=floor(k/d)
d1=runif(s1,0,1)###First half of the vector
d2=runif(k-s1,1,2)###Second half of the vector
a=c(d1,d2)
p=x+a###Prior for Dirichlet
dr=rdirichlet(10000, p)###Posterior
for(j in 1:k) {
l[j]=round(quantile(dr[,j],0.025),4)###Lower Limit
u[j]=round(quantile(dr[,j],0.975),4)###Upper Limit
m[j]=round(mean(dr[,j]),4)###Point Estimate
diff[j]=u[j]-l[j]
}
data_frame(
method = "bmdu",
lower_limit = l,
upper_limit = m,
volume = prod(diff),
mean = m
)
} else {
warning('Size of the division should be less than the size of the input matrix')
data_frame(
method = "bmdu",
lower_limit = l,
upper_limit = m,
volume = prod(diff),
mean = m
)
}
}

46
R/bmde.r

@ -0,0 +1,46 @@
#' Bayesian Multinomial Dirichlet Model (Equal Prior)
#'
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
#'
#' @md
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
#' @param p equal value for the Dirichlet prior parameter - positive real number
#' @param seed random seed for reproducible results
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
#' @author Dr M Subbiah
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 1
#' scimp_bmde(y, z)
scimp_bmde <- function(x, p, seed=1492) {
k <- length(x)
n_r <- 10000
for(m in 1:k) {
if(x[m]<0) {warning('Arguments must be non-negative integers') }
}
set.seed(seed)
po <- x+p
dr <- rdirichlet(n_r,po)
a <- l <- u <- dif <- 0
for(j in 1:k) {
a[j] <- round(mean(dr[,j]), 4)
l[j] <- round(quantile(dr[,j], 0.025),4)
u[j] <- round(quantile(dr[,j], 0.975),4)
dif[j] <- u[j] - l[j]
}
data_frame(
method = "bmde",
lower_limit = l,
upper_limit = u,
volume = prod(dif),
mean = a
)
}

19
R/bmdu.r

@ -0,0 +1,19 @@
#' Bayesian Multinomial Dirichlet Model (Unequal Prior)
#'
#' This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
#'
#' @md
#' @param x cell counts of given contingency table corresponding to a categorical data - non negative integers
#' @param d number of divisions required to split the prior vector of Dirichlet distribution to assign unequal values from U(0,1) and U(1,2)
#' @param seed random seed for reproducible results
#' @return `tibble` with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
#' @author Dr M Subbiah
#' @references Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 2
#' scimp_bmdu(y, z)
scimp_bmdu <- function(x, d, seed=1492) {
return(BMDU(x, d, seed))
}

47
R/fitz-scott.r

@ -0,0 +1,47 @@
#' Fitzpatrick and Scott Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Fitzpatrick and Scott (1987)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Fitzpatrick, S. and Scott, A. (1987). Quick simultaneous confidence interval for multinomial proportions. Journal of American Statistical Association 82(399): 875-878.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_fs(y, z)
scimp_fs <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
zval <- abs(qnorm(1 - (alpha/2)))
pi <- inpmat/s
fs_ll <- pi - (zval / (2 * sqrt(s)))
fs_ul <- pi + (zval / (2 * sqrt(s)))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (fs_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- fs_ll[r]
if (fs_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- fs_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "fs",
lower_limit = fs_ll,
upper_limit = fs_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

47
R/goodman.r

@ -0,0 +1,47 @@
#' Goodman Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Goodman (1965)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Goodman, L.A. (1965). On Simultaneous Confidence Intervals for Multinomial Proportions. Technometrics 7: 247-254.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_goodman(y, z)
scimp_goodman <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1 - (alpha/k), df = 1)
pi <- inpmat/s
goodman_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
goodman_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (goodman_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- goodman_ll[r]
if (goodman_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- goodman_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "goodman",
lower_limit = goodman_ll,
upper_limit = goodman_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

18
R/package.r

@ -1,7 +1,19 @@
#' Tools to ...
#' Simultaneous Confidence Intervals for Multinomial Proportions
#'
#' Methods for obtaining simultaneous confidence intervals for multinomial proportions have
#' been proposed by many authors and the present study include a variety of widely
#' applicable procedures. Seven classical methods (Wilson, Quesenberry and Hurst, Goodman,
#' Wald with and without continuity correction, Fitzpatrick and Scott, Sison and Glaz)
#' and Bayesian Dirichlet models are included in the package. The advantage of MCMC pack
#' has been exploited to derive the Dirichlet posterior directly and this also helps in
#' handling the Dirichlet prior parameters. This package is prepared to have equal and
#' unequal values for the Dirichlet prior distribution that will provide better scope for
#' data analysis and associated sensitivity analysis.
#'
#' @name scimple
#' @docType package
#' @author Bob Rudis (bob@@rud.is)
#' @import purrr
#' @author Dr M.Subbiah [primary], Bob Rudis (bob@@rud.is) [tidy version]
#' @import tibble stats MCMCpack
#' @importFrom dplyr mutate select
#' @importFrom purrr map map_df
NULL

47
R/qh.r

@ -0,0 +1,47 @@
#' Quesenberry and Hurst Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Quesenberry and Hurst (1964)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Quesensberry, C.P. and Hurst, D.C. (1964). Large Sample Simultaneous Confidence Intervals for Multinational Proportions. Technometrics, 6: 191-195.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_qh(y, z)
scimp_qh <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1-alpha, df=k-1)
pi <- inpmat/s
qh_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
qh_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (qh_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- qh_ll[r]
if (qh_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- qh_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "qh",
lower_limit = qh_ll,
upper_limit = qh_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

31
R/scimple.r

@ -0,0 +1,31 @@
#' Calculate multiple simultaneous confidence intervals using selected methods (excluding Bayesian methods)
#'
#' Return simultaneous confidence intervals for multinomial proportions based on selected methods.
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @param methods character vector of one or more methods to run over the input parameters
#' @return `tibble` with original and adjusted limits of multinomial proportions together with
#' product of length of k intervals as volume of simultaneous confidence intervals.
#' The `inputmat` and `alpha` values are also returned in the `tibble`.
#' @author Bob Rudis (bob@@rud.is)
#' @export
#' @examples
#' y <- c(44,55,43,32,67,78)
#' z <- 0.05
#' scimple_ci(y, z)
#' scimple_ci(y, z, c("goodman", "waldcc", "wilson"))
scimple_ci <- function(inpmat, alpha,
methods=c("fs", "goodman", "qh", "sg", "wald", "waldcc", "wilson")) {
methods <- unique(tolower(methods))
methods <- match.arg(methods, c("fs", "goodman", "qh", "sg", "wald", "waldcc", "wilson"),
several.ok=TRUE)
map_df(methods, function(func) {
df <- ci_funcs[[func]](inpmat, alpha)
mutate(df, inpmat=inpmat, alpha=alpha)
})
}

18
R/sison-glaz.r

@ -0,0 +1,18 @@
#' Sison & Glaz Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Sison and Glaz (1995).
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Sison, P.C. and Glaz J. (1995). Simultaneous Confidence Intervals and Sample Size Determination for Multinomial Proportions. Journal of the American Statistical Association 90: 366-369.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_sg(y, z)
scimp_sg <- function(inpmat, alpha) {
return(SG(inpmat, alpha))
}

49
R/wald.r

@ -0,0 +1,49 @@
#' Wald Confidence Interval
#'
#' The simple Wald type interval for multinomial proportions which is symmetrical about the
#' sample proportions. In this method no continuity corrections are made to avoid zero width
#' intervals when the sample proportions are at extreme.
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references Wald, A Tests of statistical hypotheses concerning several parameters when the number of observations is large, Trans. Am. Math. Soc. 54 (1943) 426-482.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_wald(y, z)
scimp_wald <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1 - alpha, df = 1)
pi <- inpmat / s
wald_ll = pi - (sqrt(chi*(pi)*(1-pi)/s))
wald_ul = pi + (sqrt(chi*(pi)*(1-pi)/s))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (wald_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- wald_ll[r]
if (wald_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- wald_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "wald",
lower_limit = wald_ll,
upper_limit = wald_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

46
R/waldcc.r

@ -0,0 +1,46 @@
#' Wald Confidence Interval (with continuity correction)
#'
#' The simple Wald type interval with continuity corrections for multinomial proportions which is symmetrical about the sample proportions.
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_waldcc(y, z)
scimp_waldcc <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1 - alpha, df = 1)
pi <- inpmat / s
waldcc_ll = pi - (sqrt(chi*(pi)*(1-pi)/s))-(1/(2*s))
waldcc_ul = pi + (sqrt(chi*(pi)*(1-pi)/s))+(1/(2*s))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (waldcc_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- waldcc_ll[r]
if (waldcc_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- waldcc_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "waldcc",
lower_limit = waldcc_ll,
upper_limit = waldcc_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

47
R/wilson.r

@ -0,0 +1,47 @@
#' Wilson Confidence Interval
#'
#' The simultaneous confidence interval for multinomial proportions based on the method proposed in Wilson (1927)
#'
#' @md
#' @param inpmat the cell counts of given contingency tables corresponding to categorical data
#' @param alpha a number in `[0..1]` to get the upper 100(1-`alpha`) percentage point of the chi square distribution
#' @return `tibble` with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
#' @author Dr M Subbiah
#' @references E.B. Wilson, Probable inference, the law of succession and statistical inference, J.Am. Stat. Assoc. 22 (1927) 209-212.
#' @export
#' @examples
#' y <- c(44, 55, 43, 32, 67, 78)
#' z <- 0.05
#' scimp_wilson(y, z)
scimp_wilson <- function(inpmat, alpha) {
k <- length(inpmat)
s <- sum(inpmat)
chi <- qchisq(1 - alpha, df = 1)
pi <- inpmat / s
wilson_ul <- (chi + 2*inpmat + sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
wilson_ll <- (chi + 2*inpmat - sqrt(chi*chi + 4*inpmat*chi*(1 - pi)))/(2*(chi+s))
adj_ll <- adj_ul <- 0
for (r in 1:length(inpmat)) {
if (wilson_ll[r] < 0) adj_ll[r] <- 0 else adj_ll[r] <- wilson_ll[r]
if (wilson_ul[r] > 1) adj_ul[r] <- 1 else adj_ul[r] <- wilson_ul[r]
}
ci_length <- adj_ul - adj_ll
volume <- round(prod(ci_length), 8)
data_frame(
method = "wilson",
lower_limit = wilson_ll,
upper_limit = wilson_ul,
adj_ll = adj_ll,
adj_ul = adj_ul,
volume = volume
) -> ret
ret
}

2
R/zzz.r

@ -0,0 +1,2 @@
ci_funcs <- c(scimp_fs, scimp_goodman, scimp_qh, scimp_sg, scimp_wald, scimp_waldcc, scimp_wilson)
names(ci_funcs) <- c("fs", "goodman", "qh", "sg", "wald", "waldcc", "wilson")

35
README.Rmd

@ -2,11 +2,22 @@
output: rmarkdown::github_document
---
scimple : Tools to ...
scimple : Tidy Simultaneous Confidence Interval for Multinomial Proportion
This is a tidy version of the `CoinMinD` R package with some extra bits tossed in for good measure.
The following functions are implemented:
The following data sets are included:
- `scimple_ci`: Calculate multiple simultaneous confidence intervals using selected methods (excluding Bayesian methods)
- `scimp_bmde`: Bayesian Multinomial Dirichlet Model (Equal Prior)
- `scimp_bmdu`: Bayesian Multinomial Dirichlet Model (Unequal Prior)
- `scimp_fs`: Fitzpatrick and Scott Confidence Interval
- `scimp_goodman`: Goodman Confidence Interval
- `scimp_qh`: Quesenberry & Hurst Confidence Interval
- `scimp_sg`: Sison & Glaz Confidence Interval
- `scimp_wald`: Wald Confidence Interval
- `scimp_waldcc`: Wald Confidence Interval (with continuity correction)
- `scimp_wilson`: Wilson Confidence Interval
### Installation
@ -20,12 +31,30 @@ options(width=120)
### Usage
```{r message=FALSE, warning=FALSE, error=FALSE}
```{r message=FALSE, warning=FALSE, error=FALSE, fig.width=10, fig.height=5.5, fig.retina=2}
library(scimple)
library(hrbrthemes)
library(tidyverse)
# current verison
packageVersion("scimple")
y <- c(44,55,43,32,67,78)
z <- 0.05
cis <- scimple_ci(y, z)
print(cis)
ggplot(cis) +
geom_segment(aes(x=lower_limit, xend=upper_limit, y=method, yend=method, color=method)) +
scale_color_ipsum(name=NULL) +
facet_wrap(~inpmat, scales="free_x") +
labs(x=NULL, y=NULL,
title="Multipe simultaneous confidence intervals",
subtitle="Note free X scale") +
theme_ipsum_rc(grid="X") +
theme(legend.position="bottom")
```
### Test Results

98
README.md

@ -0,0 +1,98 @@
scimple : Simultaneous Confidence Interval for Multinomial Proportion
This is a tidy version of the `CoinMinD` R package with some extra bits tossed in for good measure.
The following functions are implemented:
- `scimple_ci`: Calculate multiple simultaneous confidence intervals using selected methods (excluding Bayesian methods)
- `scimp_bmde`: Bayesian Multinomial Dirichlet Model (Equal Prior)
- `scimp_bmdu`: Bayesian Multinomial Dirichlet Model (Unequal Prior)
- `scimp_fs`: Fitzpatrick and Scott Confidence Interval
- `scimp_goodman`: Goodman Confidence Interval
- `scimp_qh`: Quesenberry & Hurst Confidence Interval
- `scimp_sg`: Sison & Glaz Confidence Interval
- `scimp_wald`: Wald Confidence Interval
- `scimp_waldcc`: Wald Confidence Interval (with continuity correction)
- `scimp_wilson`: Wilson Confidence Interval
### Installation
``` r
devtools::install_github("hrbrmstr/scimple")
```
``` r
options(width=120)
```
### Usage
``` r
library(scimple)
library(hrbrthemes)
library(tidyverse)
# current verison
packageVersion("scimple")
```
## [1] '0.1.0'
``` r
y <- c(44,55,43,32,67,78)
z <- 0.05
cis <- scimple_ci(y, z)
print(cis)
```
## # A tibble: 42 × 8
## method lower_limit upper_limit adj_ll adj_ul volume inpmat alpha
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 fs 0.08306258 0.1927995 0.08306258 0.1927995 1.75e-06 44 0.05
## 2 fs 0.11754534 0.2272823 0.11754534 0.2272823 1.75e-06 55 0.05
## 3 fs 0.07992778 0.1896647 0.07992778 0.1896647 1.75e-06 43 0.05
## 4 fs 0.04544502 0.1551819 0.04544502 0.1551819 1.75e-06 32 0.05
## 5 fs 0.15516289 0.2648998 0.15516289 0.2648998 1.75e-06 67 0.05
## 6 fs 0.18964565 0.2993826 0.18964565 0.2993826 1.75e-06 78 0.05
## 7 goodman 0.09468368 0.1966413 0.09468368 0.1966413 1.55e-06 44 0.05
## 8 goodman 0.12376893 0.2350489 0.12376893 0.2350489 1.55e-06 55 0.05
## 9 goodman 0.09208527 0.1931040 0.09208527 0.1931040 1.55e-06 43 0.05
## 10 goodman 0.06412671 0.1535697 0.06412671 0.1535697 1.55e-06 32 0.05
## # ... with 32 more rows
``` r
ggplot(cis) +
geom_segment(aes(x=lower_limit, xend=upper_limit, y=method, yend=method, color=method)) +
scale_color_ipsum(name=NULL) +
facet_wrap(~inpmat, scales="free_x") +
labs(x=NULL, y=NULL,
title="Multipe simultaneous confidence intervals",
subtitle="Note free X scale") +
theme_ipsum_rc(grid="X") +
theme(legend.position="bottom")
```
<img src="README_files/figure-markdown_github/unnamed-chunk-3-1.png" width="960" />
### Test Results
``` r
library(scimple)
library(testthat)
date()
```
## [1] "Fri Mar 3 14:36:17 2017"
``` r
test_dir("tests/")
```
## testthat results ========================================================================================================
## OK: 0 SKIPPED: 0 FAILED: 0
##
## DONE ===================================================================================================================

BIN
README_files/figure-markdown_github/unnamed-chunk-3-1.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 115 KiB

32
man/scimp_bmde.Rd

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bmde.r
\name{scimp_bmde}
\alias{scimp_bmde}
\title{Bayesian Multinomial Dirichlet Model (Equal Prior)}
\usage{
scimp_bmde(x, p, seed = 1492)
}
\arguments{
\item{x}{cell counts of given contingency table corresponding to a categorical data - non negative integers}
\item{p}{equal value for the Dirichlet prior parameter - positive real number}
\item{seed}{random seed for reproducible results}
}
\value{
\code{tibble} with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
}
\description{
This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 1
scimp_bmde(y, z)
}
\references{
Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
}
\author{
Dr M Subbiah
}

32
man/scimp_bmdu.Rd

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/bmdu.r
\name{scimp_bmdu}
\alias{scimp_bmdu}
\title{Bayesian Multinomial Dirichlet Model (Unequal Prior)}
\usage{
scimp_bmdu(x, d, seed = 1492)
}
\arguments{
\item{x}{cell counts of given contingency table corresponding to a categorical data - non negative integers}
\item{d}{number of divisions required to split the prior vector of Dirichlet distribution to assign unequal values from U(0,1) and U(1,2)}
\item{seed}{random seed for reproducible results}
}
\value{
\code{tibble} with original limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals and the mean
}
\description{
This method provides 95 percent simultaneous confidence interval for multinomial proportions based on Bayesian Multinomial Dirichlet model. However, it provides a mechanism through which user can split the Dirichlet prior parameter vector and suitable distributions can be incorporated for each of two groups.
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 2
scimp_bmdu(y, z)
}
\references{
Gelman, A., Carlin, J.B., Stern, H.S., and Rubin, D.B. (2002). Bayesian Data Analysis. Chapman & Hall, London.
}
\author{
Dr M Subbiah
}

30
man/scimp_fs.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fitz-scott.r
\name{scimp_fs}
\alias{scimp_fs}
\title{Fitzpatrick and Scott Confidence Interval}
\usage{
scimp_fs(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simultaneous confidence interval for multinomial proportions based on the method proposed in Fitzpatrick and Scott (1987)
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_fs(y, z)
}
\references{
Fitzpatrick, S. and Scott, A. (1987). Quick simultaneous confidence interval for multinomial proportions. Journal of American Statistical Association 82(399): 875-878.
}
\author{
Dr M Subbiah
}

30
man/scimp_goodman.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/goodman.r
\name{scimp_goodman}
\alias{scimp_goodman}
\title{Goodman Confidence Interval}
\usage{
scimp_goodman(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simultaneous confidence interval for multinomial proportions based on the method proposed in Goodman (1965)
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_goodman(y, z)
}
\references{
Goodman, L.A. (1965). On Simultaneous Confidence Intervals for Multinomial Proportions. Technometrics 7: 247-254.
}
\author{
Dr M Subbiah
}

30
man/scimp_qh.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/qh.r
\name{scimp_qh}
\alias{scimp_qh}
\title{Quesenberry and Hurst Confidence Interval}
\usage{
scimp_qh(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simultaneous confidence interval for multinomial proportions based on the method proposed in Quesenberry and Hurst (1964)
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_qh(y, z)
}
\references{
Quesensberry, C.P. and Hurst, D.C. (1964). Large Sample Simultaneous Confidence Intervals for Multinational Proportions. Technometrics, 6: 191-195.
}
\author{
Dr M Subbiah
}

30
man/scimp_sg.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sison-glaz.r
\name{scimp_sg}
\alias{scimp_sg}
\title{Sison & Glaz Confidence Interval}
\usage{
scimp_sg(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simultaneous confidence interval for multinomial proportions based on the method proposed in Sison and Glaz (1995).
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_sg(y, z)
}
\references{
Sison, P.C. and Glaz J. (1995). Simultaneous Confidence Intervals and Sample Size Determination for Multinomial Proportions. Journal of the American Statistical Association 90: 366-369.
}
\author{
Dr M Subbiah
}

32
man/scimp_wald.Rd

@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wald.r
\name{scimp_wald}
\alias{scimp_wald}
\title{Wald Confidence Interval}
\usage{
scimp_wald(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simple Wald type interval for multinomial proportions which is symmetrical about the
sample proportions. In this method no continuity corrections are made to avoid zero width
intervals when the sample proportions are at extreme.
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_wald(y, z)
}
\references{
Wald, A Tests of statistical hypotheses concerning several parameters when the number of observations is large, Trans. Am. Math. Soc. 54 (1943) 426-482.
}
\author{
Dr M Subbiah
}

27
man/scimp_waldcc.Rd

@ -0,0 +1,27 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/waldcc.r
\name{scimp_waldcc}
\alias{scimp_waldcc}
\title{Wald Confidence Interval (with continuity correction)}
\usage{
scimp_waldcc(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simple Wald type interval with continuity corrections for multinomial proportions which is symmetrical about the sample proportions.
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_waldcc(y, z)
}
\author{
Dr M Subbiah
}

30
man/scimp_wilson.Rd

@ -0,0 +1,30 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/wilson.r
\name{scimp_wilson}
\alias{scimp_wilson}
\title{Wilson Confidence Interval}
\usage{
scimp_wilson(inpmat, alpha)
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with product of length of k intervals as volume of simultaneous confidence intervals
}
\description{
The simultaneous confidence interval for multinomial proportions based on the method proposed in Wilson (1927)
}
\examples{
y <- c(44, 55, 43, 32, 67, 78)
z <- 0.05
scimp_wilson(y, z)
}
\references{
E.B. Wilson, Probable inference, the law of succession and statistical inference, J.Am. Stat. Assoc. 22 (1927) 209-212.
}
\author{
Dr M Subbiah
}

21
man/scimple.Rd

@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/package.r
\docType{package}
\name{scimple}
\alias{scimple}
\alias{scimple-package}
\title{Simultaneous Confidence Intervals for Multinomial Proportions}
\description{
Methods for obtaining simultaneous confidence intervals for multinomial proportions have
been proposed by many authors and the present study include a variety of widely
applicable procedures. Seven classical methods (Wilson, Quesenberry and Hurst, Goodman,
Wald with and without continuity correction, Fitzpatrick and Scott, Sison and Glaz)
and Bayesian Dirichlet models are included in the package. The advantage of MCMC pack
has been exploited to derive the Dirichlet posterior directly and this also helps in
handling the Dirichlet prior parameters. This package is prepared to have equal and
unequal values for the Dirichlet prior distribution that will provide better scope for
data analysis and associated sensitivity analysis.
}
\author{
Dr M.Subbiah [primary], Bob Rudis (bob@rud.is) [tidy version]
}

33
man/scimple_ci.Rd

@ -0,0 +1,33 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/scimple.r
\name{scimple_ci}
\alias{scimple_ci}
\title{Calculate multiple simultaneous confidence intervals using selected methods (excluding Bayesian methods)}
\usage{
scimple_ci(inpmat, alpha, methods = c("fs", "goodman", "qh", "sg", "wald",
"waldcc", "wilson"))
}
\arguments{
\item{inpmat}{the cell counts of given contingency tables corresponding to categorical data}
\item{alpha}{a number in \code{[0..1]} to get the upper 100(1-\code{alpha}) percentage point of the chi square distribution}
\item{methods}{character vector of one or more methods to run over the input parameters}
}
\value{
\code{tibble} with original and adjusted limits of multinomial proportions together with
product of length of k intervals as volume of simultaneous confidence intervals.
The \code{inputmat} and \code{alpha} values are also returned in the \code{tibble}.
}
\description{
Return simultaneous confidence intervals for multinomial proportions based on selected methods.
}
\examples{
y <- c(44,55,43,32,67,78)
z <- 0.05
scimple_ci(y, z)
scimple_ci(y, z, c("goodman", "waldcc", "wilson"))
}
\author{
Bob Rudis (bob@rud.is)
}

21
scimple.Rproj

@ -0,0 +1,21 @@
Version: 1.0
RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: No
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
Loading…
Cancel
Save