Tags:
create new tag
view all tags

Cheat sheet for Matlab user:

Matlab vs. R

Some useful R commands:

# command line execution of R scripts:

R CMD BATCH < test.r

# get help (e.g.):

> help ("write.table)

# write data to file:

> write.table(sqd, file="test.dat", col.names = FALSE, quote=FALSE, row.names=FALSE)

# read data from file:

> rtd <- read.table("uf100-0239-ws55-rtd.dat")

> median(rtd$V2)

> summary(rtd)

V1 V2 V3
Min. :0.0010 Min. : 95 Min. :0.0001115
1st Qu.:0.2507 1st Qu.: 3276 1st Qu.:0.0038440
Median :0.5005 Median : 8318 Median :0.0097611
Mean :0.5005 Mean :12995 Mean :0.0152500
3rd Qu.:0.7502 3rd Qu.:18308 3rd Qu.:0.0214859
Max. :1.0000 Max. :91660 Max. :0.1075688

# produce histogram of column V2:

> hist(rtd$V2)

# plot cdf:

> library(stepfun) > plot(ecdf(rtd$V2))

# qq plot against std normal:

> qqnorm(rtd$V2); qqline(rtd$V2)

# wilcoxon rank sum test (compare rtds) = mann-whitney u-test:

> library(ctest)

> wilcox.test(rtd$V2,rtd40$V2,paired=FALSE)

Note: Wilcoxon rank sum test with continuity correction

data: rtd$V2 and rtd40$V2

W = 440056, p-value = 3.45e-06

alternative hypothesis: true mu is not equal to 0

# -> reject null hyp (null hyp = med are equal) -> med are not equal

# kolmogorov-smirnoff test:

> ks.test(rtd$V2,rtd50$V2)

Note: Two-sample Kolmogorov-Smirnov test

data: rtd$V2 and rtd50$V2

D = 0.029, p-value = 0.7944

alternative hypothesis: two.sided

Warning message:

cannot compute correct p-values with ties in: ks.test(rtd$V2, rtd50$V2)

# -> do not reject null hyp (distr are equal)

# kendall's tau test:

> corr <- read.table("flat100-corr-nov+.dat") # xxx

> cor.test(corr$V1,corr$V2, method="kendall")

Note: Kendall's rank correlation tau

data: corr$V1 and corr$V2

z.tau = 12.9965, p-value = < 2.2e-16

alternative hypothesis: true tau is not equal to 0

sample estimates:

tau

0.8816162

# -> reject null hyp (no correlation between data)

# spearman's rank order test (alt to above):

> cor.test(corr$V1,corr$V2, method="spear")

# wilcoxon matched pairs signed-rank test:

> wilcox.test(corr$V1,corr$V2, paired=TRUE)

Note: Wilcoxon signed rank test with continuity correction

data: corr$V1 and corr$V2

V = 3919, p-value = 1.657e-06

alternative hypothesis: true mu is not equal to 0

# -> reject null hyp (no sign perf diff)

#kolmogorov-smirnov test against exp distr

> ks.test(rtd$V2, pexp, 1/mean(rtd$V2))

> ks.test(rtd$V2, pexp, log(2)/29.4)

Note: chisq.test is not the goodness of fit test!

# qqplot of rtd vs. simple exp approx:

> qqplot(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2)))

> qqplot(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2)),log="xy")

> rtd <- read.table("ihlk-restart-output-1000-7-rtd.dat")

> qqplot(rtd$V2,qexp(1:500/500,log(2)/29.4))

# combine columns into table (array):

> qq <- cbind(rtd$V2,qexp(rtd$V1,1/mean(rtd$V2)))

# write 2-dim table (array) to file:

> write (t(qq), file="qq.dat", ncolumns=2)

# count number of inst for which alg A > alg B:

> table(corr$V1 > corr$V2)

# compute correlation of vectors x,y

> cor(x,y)

# test distribution for normality:

> shapiro.test(x)

Note: Shapiro-Wilk normality test

[p-value < alpha: null hypothesis = data are normally distributed is rejected]

from Holger H. Hoos

-- Main.xulin730 - 22 Apr 2009

Topic revision: r1 - 2009-04-22 - xulin730
 
This site is powered by the TWiki collaboration platform Powered by PerlCopyright © 2008-2024 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback