\documentclass{article} \usepackage{sober} \usepackage[utf8]{inputenc} \title{Various ways to compare histograms} \author{Ben Bolker} \date{\today} \newcommand{\code}[1]{{\tt #1}} \begin{document} \maketitle Graphically comparing distributions, especially with small samples, is a challenge. Here are some approaches. \SweaveOpts{fig.width=5,fig.height=5,out.width="0.7\\textwidth"} <>= library(ggplot2) theme_set(theme_bw()) @ Example data: <<>>= set.seed(1001) z1 <- rnorm(100,mean=20,sd=2) z2 <- rnorm(100,mean=25,sd=2) @ The \code{multhist} function from the \code{plotrix} package: <<>>= library(plotrix) m <- multhist(list(z1,z2), breaks=seq(9.5,30.5),names.arg=10:30) box(bty="l") @ Or overlay histograms with transparent colors: <>= trred <- rgb(1,0,0,0.5) ## transparent red trblue <- rgb(0,0,1,0.5) ## transparent blue hist(z1,col=trred,breaks=14:35,main="",ylim=c(0,30)) par(new=TRUE) hist(z2,col=trblue,breaks=14:35,main="",ylim=c(0,30)) @ Comparing density estimates instead: <<>>= plot(density(z1),ylim=c(0,0.25),xlim=c(10,35)) lines(density(z2),col=2) @ Alternative code: <<>>= d1 = density(z1,from=10,to=35) d2 = density(z2,from=10,to=35) matplot(d1$x,cbind(d1$y,d2$y),lty=1,col=1:2,type="l") @ Using lattice or ggplot2 packages requires that we combine the data into a single data frame: <<>>= dat <- data.frame(z=c(z1,z2),grp=factor(rep(1:2,each=100))) library(lattice) print(histogram(~z|grp,data=dat)) @ The \code{groups} argument works for density plots, not histograms (from \code{?histogram}: ``Note that the default panel function for `histogram’ does not support grouped displays, whereas the one for ‘densityplot’ does.''). <<>>= densityplot(~z,groups=grp,data=dat) @ \code{ggplot} is the new hotness. <<>>= library(ggplot2) ## overlapping/transparent ggplot(dat,aes(x=z,group=grp,fill=grp))+ geom_histogram(colour="gray",binwidth=0.5,alpha=0.5, position="identity") @ (Note that lattice plots have the \code{groups} argument, but ggplot uses the \code{group} aesthetic. I used \verb+theme_set(theme_bw())+ to change to my preferred plot style.) Superimpose histograms (scaled to prob. density) and density lines: <<>>= ggplot(dat,aes(x=z,group=grp,fill=grp,colour=grp))+ stat_bin(colour="gray",binwidth=0.5,alpha=0.5, position="identity",aes(y=..density..))+ geom_density(fill=NA) @ Put the bars next to each other: <<>>= ggplot(dat,aes(x=z,group=grp,fill=grp))+ geom_histogram(colour="gray",binwidth=0.5, position="dodge")+theme_bw() @ The same thing but without gray borders on the bars (seem to overlap slightly??) <<>>= ggplot(dat,aes(x=z,group=grp,fill=grp,colour=grp))+ geom_histogram(binwidth=0.5, position="dodge")+theme_bw() @ Separate graphs: <<>>= ggplot(dat,aes(x=z,group=grp,fill=grp))+ geom_histogram(colour="gray",binwidth=0.5)+facet_wrap(~grp)+ theme_bw() @ For more than two groups one should probably look into vertical presentations of the data, i.e. boxplots/violin plots/bean plots \ldots \end{document}