Title: | Format Output of Various Routines in a Suitable Way for Reports and Publication |
---|---|
Description: | A bunch of convenience functions that transform the results of some basic statistical analyses into table format nearly ready for publication. This includes descriptive tables, tables of logistic regression and Cox regression results as well as forest plots. |
Authors: | Thomas A. Gerds [aut, cre], Christian Torp-Pedersen [ctb], Klaus K Holst [ctb], Brice Ozenne [aut] |
Maintainer: | Thomas A. Gerds <[email protected]> |
License: | GPL (>= 2) |
Version: | 2023.01.17 |
Built: | 2024-11-04 04:20:06 UTC |
Source: | https://github.com/tagteam/publish |
This package processes results of descriptive statistcs and regression analysis into final tables and figures of a manuscript
cut
A version of cut
that easily formats the labels and places breaks by default.
acut( x, n = 5, type = "default", format = NULL, format.low = NULL, format.high = NULL, dig.lab = 3, right = TRUE, breaks, labels = TRUE, ... )
acut( x, n = 5, type = "default", format = NULL, format.low = NULL, format.high = NULL, dig.lab = 3, right = TRUE, breaks, labels = TRUE, ... )
x |
a numeric vector which is to be converted to a factor by cutting (passed directly to |
n |
number of bins to create based on the empirical quantiles of x. This will be overruled if |
type |
a high-level formatting option. For now, the only other option than the default setting is " |
format |
string used to make labels. %l and %u identifies the lower and upper value of the breaks respectively. See examples. |
format.low |
string used specifically on the lowest label. |
format.high |
string used specifically on the highest label. |
dig.lab |
integer which is used when labels are not given. It determines the number of digits used in formatting the break numbers. (Passed directly to |
right |
logical, indicating if the intervals should be closed on the right (and open on the left) or vice versa (passed directly to |
breaks |
specify breaks manually as in |
labels |
logical, indicating whether or not to make labels or simply use ordered numbers. If TRUE, the labels are constructed as discribed above. |
... |
further arguments passed to |
The formats are supplied by specifiyng the text around the lower (%l) and upper (%l) value (see examples).
If user specified breaks are supplied, the default labels from cut
are used.
If automatic breaks are used, the default labels are a slight modification at the end point of the default from cut
All this can of course be adjusted manually through the format functionality (see below).
By default, 5 breaks are constructed according to the quantiles with of the input x
.
The number of breaks can be adjusted, and default specifying breaks (as in cut
) can be supplied instead.
If type
is changed from "default
" to another option, a different formatting template is used.
For now the only other option is "age
", which is designed to be well suited to easily group age variables.
When type
="age
" only the breaks
argument is used, and it behaves different from otherwise.
If a single number is supplied, intervals of length breaks
will automatically be constructed (starting from 0).
If a vector is supplied, the intervals are used as in cut
but formatted differently, see examples.
same as for cut. A vector of 'factors' is created, unless 'labels=FALSE'.
Anders Munch
data(Diabetes) # load dataset ## The default uses format similar to cut chol.groups <- acut(Diabetes$chol) table(chol.groups) ## The formatting can easily be changed chol.groups <- acut(Diabetes$chol,format="%l-%u",n=5) table(chol.groups) ## The default is to automatic place the breaks, so the number of this can easily be changed. chol.groups <- acut(Diabetes$chol,n=7) table(chol.groups) ## Manually setting format and breaks age.groups <- acut(Diabetes$age,format="%l-%u",breaks=seq(0,100,by=10)) table(age.groups) ## Other variations age.groups <- acut(Diabetes$age, format="%l-%u", format.low="below %u", format.high="above %l", breaks=c(0, seq(20,80,by=10), Inf)) table(age.groups) BMI.groups <- acut(Diabetes$BMI, format="BMI between %l and %u", format.low="BMI below %u", format.high="BMI above %l") table(BMI.groups) org(as.data.frame(table(BMI=BMI.groups))) ## Instead of using the quantiles, we can specify equally spaced breaks, ## but still get the same formatting BMI.grouping <- seq(min(Diabetes$BMI,na.rm=TRUE), max(Diabetes$BMI,na.rm=TRUE), length.out=6) BMI.grouping[1] <- -Inf # To get all included BMI.groups <- acut(Diabetes$BMI, breaks=BMI.grouping, format="BMI between %l and %u", format.low="BMI below %u", format.high="BMI above %l") table(BMI.groups) org(as.data.frame(table(BMI=BMI.groups))) ## Using type="age" ## When using type="age", categories of 10 years are constructed by default. ## The are formatted to be easier to read when the values are ages. table(acut(Diabetes$age, type="age")) ## This can be changes with the breaks argument. ## Note that this is diffent from cut when breaks is a single number. table(acut(Diabetes$age, type="age", breaks=20)) ## Of course We can also supply the breaks manually. ## The formatting depends on whether or not all the values fall within the breaks: ## All values within the breaks table(acut(Diabetes$age, type="age", breaks=c(0, 30, 50, 80, 100))) ## Some values below and above the breaks table(acut(Diabetes$age, type="age", breaks=c(30, 50, 80)))
data(Diabetes) # load dataset ## The default uses format similar to cut chol.groups <- acut(Diabetes$chol) table(chol.groups) ## The formatting can easily be changed chol.groups <- acut(Diabetes$chol,format="%l-%u",n=5) table(chol.groups) ## The default is to automatic place the breaks, so the number of this can easily be changed. chol.groups <- acut(Diabetes$chol,n=7) table(chol.groups) ## Manually setting format and breaks age.groups <- acut(Diabetes$age,format="%l-%u",breaks=seq(0,100,by=10)) table(age.groups) ## Other variations age.groups <- acut(Diabetes$age, format="%l-%u", format.low="below %u", format.high="above %l", breaks=c(0, seq(20,80,by=10), Inf)) table(age.groups) BMI.groups <- acut(Diabetes$BMI, format="BMI between %l and %u", format.low="BMI below %u", format.high="BMI above %l") table(BMI.groups) org(as.data.frame(table(BMI=BMI.groups))) ## Instead of using the quantiles, we can specify equally spaced breaks, ## but still get the same formatting BMI.grouping <- seq(min(Diabetes$BMI,na.rm=TRUE), max(Diabetes$BMI,na.rm=TRUE), length.out=6) BMI.grouping[1] <- -Inf # To get all included BMI.groups <- acut(Diabetes$BMI, breaks=BMI.grouping, format="BMI between %l and %u", format.low="BMI below %u", format.high="BMI above %l") table(BMI.groups) org(as.data.frame(table(BMI=BMI.groups))) ## Using type="age" ## When using type="age", categories of 10 years are constructed by default. ## The are formatted to be easier to read when the values are ages. table(acut(Diabetes$age, type="age")) ## This can be changes with the breaks argument. ## Note that this is diffent from cut when breaks is a single number. table(acut(Diabetes$age, type="age", breaks=20)) ## Of course We can also supply the breaks manually. ## The formatting depends on whether or not all the values fall within the breaks: ## All values within the breaks table(acut(Diabetes$age, type="age", breaks=c(0, 30, 50, 80, 100))) ## Some values below and above the breaks table(acut(Diabetes$age, type="age", breaks=c(30, 50, 80)))
Compute mean values with confidence intervals
ci.mean(x, ...)
ci.mean(x, ...)
x |
object passed to methods |
... |
passed to methods |
Normal approximation
a list with mean values and confidence limits
Compute mean values with confidence intervals
## Default S3 method: ci.mean( x, alpha = 0.05, normal = TRUE, na.rm = TRUE, statistic = "arithmetic", ... )
## Default S3 method: ci.mean( x, alpha = 0.05, normal = TRUE, na.rm = TRUE, statistic = "arithmetic", ... )
x |
numeric vector |
alpha |
level of significance |
normal |
If |
na.rm |
If |
statistic |
Decide which mean to compute: either |
... |
not used |
Normal approximation
a list with mean values and confidence limits
Thomas Gerds
These data are used for testing Publish package functionality.
A data frame with 27 observations on the following 9 variables.
data(CiTable) labellist <- split(CiTable[,c("Dose","Mean","SD","n")],CiTable[,"Drug"]) labellist plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=labellist)
data(CiTable) labellist <- split(CiTable[,c("Dose","Mean","SD","n")],CiTable[,"Drug"]) labellist plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=labellist)
Run a series of Cox regression analyses for a list of predictor variables and summarize the results in a table. The Cox models can be adjusted for a fixed set of covariates
This function runs on coxph
from the survival package.
coxphSeries(formula, data, vars, ...)
coxphSeries(formula, data, vars, ...)
formula |
The fixed part of the regression formula. For
univariate analyses this is simply |
data |
A |
vars |
A list of variable names, the changing part of the regression formula. |
... |
passed to publish.coxph |
matrix with results
Thomas Alexander Gerds
library(survival) data(pbc) ## collect hazard ratios from three univariate Cox regression analyses pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) uni.hr <- coxphSeries(Surv(time,status==2)~1,vars=c("edema","bili","protime"),data=pbc) uni.hr ## control the logistic regression analyses for age and gender ## but collect only information on the variables in `vars'. controlled.hr <- coxphSeries(Surv(time,status==2)~age+sex,vars=c("edema","bili","protime"),data=pbc) controlled.hr
library(survival) data(pbc) ## collect hazard ratios from three univariate Cox regression analyses pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) uni.hr <- coxphSeries(Surv(time,status==2)~1,vars=c("edema","bili","protime"),data=pbc) uni.hr ## control the logistic regression analyses for age and gender ## but collect only information on the variables in `vars'. controlled.hr <- coxphSeries(Surv(time,status==2)~age+sex,vars=c("edema","bili","protime"),data=pbc) controlled.hr
These data are courtesy of Dr John Schorling, Department of Medicine, University of Virginia School of Medicine. The data consist of 19 variables on 403 subjects from 1046 subjects who were interviewed in a study to understand the prevalence of obesity, diabetes, and other cardiovascular risk factors in central Virginia for African Americans. According to Dr John Hong, Diabetes Mellitus Type II (adult onset diabetes) is associated most strongly with obesity. The waist/hip ratio may be a predictor in diabetes and heart disease. DM II is also agssociated with hypertension - they may both be part of "Syndrome X". The 403 subjects were the ones who were actually screened for diabetes. Glycosolated hemoglobin > 7.0 is usually taken as a positive diagnosis of diabetes.
A data frame with 205 observations on the following 12 variables.
subject id
Total Cholesterol
Stabilized Glucose
High Density Lipoprotein
Cholesterol/HDL Ratio
Glycosolated Hemoglobin
a factor with levels (Buckingham,Louisa)
age (years)
male or female
height (inches)
height (cm)
weight (pounds)
weight (kg)
a factor with levels (small,medium,large)
First Systolic Blood Pressure
First Diastolic Blood Pressure
Second Diastolic Blood Pressure
Second Diastolic Blood Pressure
waist in inches
hip in inches
Postprandial Time when Labs were Drawn in minutes
Categorized age
Categorized BMI
Willems JP, Saunders JT, DE Hunt, JB Schorling: Prevalence of coronary heart disease risk factors among rural blacks: A community-based study. Southern Medical Journal 90:814-820; 1997 Schorling JB, Roach J, Siegel M, Baturka N, Hunt DE, Guterbock TM, Stewart HL: A trial of church-based smoking cessation interventions for rural African Americans. Preventive Medicine 26:92-101; 1997.
data(Diabetes)
data(Diabetes)
Expand regression coefficient table
fixRegressionTable( x, varnames, reference.value, reference.style = NULL, factorlevels, scale = NULL, nmiss, intercept )
fixRegressionTable( x, varnames, reference.value, reference.style = NULL, factorlevels, scale = NULL, nmiss, intercept )
x |
object resulting from |
varnames |
Names of variables |
reference.value |
Reference value for reference categories |
reference.style |
Style for showing results for categorical
variables. If |
factorlevels |
Levels of the categorical variables. |
scale |
Scale for some or all of the variables |
nmiss |
Number of missing values |
intercept |
Intercept |
This function expands results from "regressionTable" with extralines and columns
For factor variables the reference group is shown. For continuous variables the units are shown and for transformed continuous variables also the scale. For all variables the numbers of missing values are added.
a table with regression coefficients
Thomas Alexander Gerds <[email protected]>
Summarize baseline variables in groups defined by outcome at a given followup time point
followupTable(formula, data, followup.time, compare.groups, ...)
followupTable(formula, data, followup.time, compare.groups, ...)
formula |
Formula A formula whose left hand side is a
|
data |
A data.frame in which all the variables of
|
followup.time |
Time point at which to evaluate outcome status. |
compare.groups |
Method for comparing groups. |
... |
Passed to |
If compare.groups!=FALSE
, p-values are obtained from stopped Cox regression, i.e., all events are censored at follow-up time.
A univariate Cox regression model is fitted to assess the effect of each variable on the right hand side of the formula on the event hazard and shown is the p-value of anova(fit)
, see anova.coxph
.
Summary table.
Thomas A. Gerds <[email protected]>
univariateTable
library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) pbc$sex <- factor(pbc$sex,levels=c("m","f"),labels=c("m","f")) followupTable(Hist(time,status)~age+edema+sex,data=pbc,followup.time=1000)
library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) pbc$sex <- factor(pbc$sex,levels=c("m","f"),labels=c("m","f")) followupTable(Hist(time,status)~age+edema+sex,data=pbc,followup.time=1000)
Format confidence intervals
formatCI( x, lower, upper, show.x = FALSE, handler = "sprintf", format = "[l;u]", degenerated = "asis", digits = 2, nsmall = digits, sep = "", reference.pos, reference.label = "", ... )
formatCI( x, lower, upper, show.x = FALSE, handler = "sprintf", format = "[l;u]", degenerated = "asis", digits = 2, nsmall = digits, sep = "", reference.pos, reference.label = "", ... )
x |
not used (for compatibility with format) |
lower |
Numeric vector of lower limits |
upper |
Numeric vector of upper limits |
show.x |
Logical. If |
handler |
Function to format numeric values. Default is
|
format |
Character string in which |
degenerated |
String to show when lower==upper. Default is '–' |
digits |
If handler |
nsmall |
If handler |
sep |
Field separator |
reference.pos |
Position of factor reference |
reference.label |
Label for factor reference |
... |
passed to handler |
The default format for confidence intervals is [lower; upper].
String vector with confidence intervals
Thomas A. Gerds <[email protected]>
plot.ci ci.mean
x=ci.mean(rnorm(10)) formatCI(lower=x[3],upper=x[4]) formatCI(lower=c(0.001,-2.8413),upper=c(1,3.0008884)) # change format formatCI(lower=c(0.001,-2.8413),upper=c(1,3.0008884),format="(l, u)") # show x formatCI(x=x$mean,lower=x$lower,upper=x$upper,format="(l, u)",show.x=TRUE) # change of handler function l <- c(-0.0890139,0.0084736,144.898333,0.000000001) u <- c(0.03911392,0.3784706,3338944.8821221,0.00001) cbind(format=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="format"), prettyNum=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="prettyNum"), sprintf=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="sprintf"))
x=ci.mean(rnorm(10)) formatCI(lower=x[3],upper=x[4]) formatCI(lower=c(0.001,-2.8413),upper=c(1,3.0008884)) # change format formatCI(lower=c(0.001,-2.8413),upper=c(1,3.0008884),format="(l, u)") # show x formatCI(x=x$mean,lower=x$lower,upper=x$upper,format="(l, u)",show.x=TRUE) # change of handler function l <- c(-0.0890139,0.0084736,144.898333,0.000000001) u <- c(0.03911392,0.3784706,3338944.8821221,0.00001) cbind(format=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="format"), prettyNum=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="prettyNum"), sprintf=formatCI(lower=l,upper=u,format="[l;u)",digits=2,nsmall=2,handler="sprintf"))
Run a series of generalized linear regression analyses for a list of predictor variables and summarize the results in a table. The regression models can be adjusted for a fixed set of covariates.
glmSeries(formula, data, vars, ...)
glmSeries(formula, data, vars, ...)
formula |
The fixed part of the regression formula. For
univariate analyses this is simply |
data |
A |
vars |
A list of variable names, the changing part of the regression formula. |
... |
passed to glm |
Matrix with regression coefficients, one for each element of vars
.
Thomas Alexander Gerds
data(Diabetes) Diabetes$hyper1 <- factor(1*(Diabetes$bp.1s>140)) ## collect odds ratios from three univariate logistic regression analyses uni.odds <- glmSeries(hyper1~1,vars=c("chol","hdl","location"),data=Diabetes,family=binomial) uni.odds ## control the logistic regression analyses for age and gender ## but collect only information on the variables in `vars'. controlled.odds <- glmSeries(hyper1~age+gender, vars=c("chol","hdl","location"), data=Diabetes, family=binomial) controlled.odds
data(Diabetes) Diabetes$hyper1 <- factor(1*(Diabetes$bp.1s>140)) ## collect odds ratios from three univariate logistic regression analyses uni.odds <- glmSeries(hyper1~1,vars=c("chol","hdl","location"),data=Diabetes,family=binomial) uni.odds ## control the logistic regression analyses for age and gender ## but collect only information on the variables in `vars'. controlled.odds <- glmSeries(hyper1~age+gender, vars=c("chol","hdl","location"), data=Diabetes, family=binomial) controlled.odds
Label output tables
labelUnits(x, ...)
labelUnits(x, ...)
x |
A matrix obtained with |
... |
not used |
Modify labels and values of variables in summary tables
The re-labeled matrix
Thomas A. Gerds <[email protected]>
univariateTable
data(Diabetes) tab <- summary(univariateTable(gender~AgeGroups+chol+waist,data=Diabetes)) publish(tab) ltab <- labelUnits(tab,"chol"="Cholesterol (mg/dL)","<40"="younger than 40") publish(ltab) ## pass labels immediately to utable utable(gender~AgeGroups+chol+waist,data=Diabetes, "chol"="Cholesterol (mg/dL)","<40"="younger than 40") ## sometimes useful to state explicitly which variables value ## should be re-labelled utable(gender~AgeGroups+chol+waist,data=Diabetes, "chol"="Cholesterol (mg/dL)","AgeGroups.<40"="younger than 40")
data(Diabetes) tab <- summary(univariateTable(gender~AgeGroups+chol+waist,data=Diabetes)) publish(tab) ltab <- labelUnits(tab,"chol"="Cholesterol (mg/dL)","<40"="younger than 40") publish(ltab) ## pass labels immediately to utable utable(gender~AgeGroups+chol+waist,data=Diabetes, "chol"="Cholesterol (mg/dL)","<40"="younger than 40") ## sometimes useful to state explicitly which variables value ## should be re-labelled utable(gender~AgeGroups+chol+waist,data=Diabetes, "chol"="Cholesterol (mg/dL)","AgeGroups.<40"="younger than 40")
This function eases the process of generating date variables. All variables in a data.frame which match a regular expression are included
lazyDateCoding(data, format, pattern, varnames, testlength = 10)
lazyDateCoding(data, format, pattern, varnames, testlength = 10)
data |
Data frame in which to search for date variables. |
format |
passed to as.Date |
pattern |
match date variables |
varnames |
variable names |
testlength |
how many rows of data should be evaluated to guess the format. |
The code needs to be copy-and-pasted from the R-output buffer into the R-code buffer. This can be customized for the really efficiently working people, e.g., in emacs.
R-code one line for each variable.
Thomas Alexander Gerds
d <- data.frame(x0="190101",x1=c("12/8/2019"),x2="12-8-2019",x3="20190812",stringsAsFactors=FALSE) lazyDateCoding(d,pattern="x") lazyDateCoding(d,pattern="3")
d <- data.frame(x0="190101",x1=c("12/8/2019"),x2="12-8-2019",x3="20190812",stringsAsFactors=FALSE) lazyDateCoding(d,pattern="x") lazyDateCoding(d,pattern="3")
This function eases the process of generating factor variables with relevant labels. All variables in a data.frame with less than a user set number of levels result in a line which suggests levels and labels. The result can then be modified for use.
lazyFactorCoding(data, max.levels = 10)
lazyFactorCoding(data, max.levels = 10)
data |
Data frame in which to search for categorical variables. |
max.levels |
Treat non-factor variables only if the number of unique values less than max.levels. Defaults to 10. |
The code needs to be copy-and-pasted from the R-output buffer into the R-code buffer. This can be customized for the really efficiently working people e.g. in emacs.
R-code one line for each variable.
Thomas Alexander Gerds
data(Diabetes) lazyFactorCoding(Diabetes)
data(Diabetes) lazyFactorCoding(Diabetes)
Wrapper for publish(...,org=TRUE)
org(x, ...)
org(x, ...)
x |
object to format as org |
... |
passed to publish |
See publish
Thomas Alexander Gerds
Parse interaction terms for regression tables
parseInteractionTerms( terms, xlevels, units, format.factor, format.contrast, format.scale, format.scale.unit, sep = ": ", ... )
parseInteractionTerms( terms, xlevels, units, format.factor, format.contrast, format.scale, format.scale.unit, sep = ": ", ... )
terms |
Terms of a formula |
xlevels |
Factor levels corresponding to the variables in
|
units |
named list with unit labels. names should match variable names in formula. |
format.factor |
For categorical variables. A string which specifies the print format for factor labels.
The string has to contain the keywords |
format.contrast |
For categorical variables. A string which specifies the print format for constrast statements.
The string has to contain the keywords |
format.scale |
A string which specifies the print format for continuous variables without units.
The string has to contain the keyword |
format.scale.unit |
A string which specifies the print format for continuous variables with units.
The string has to contain the keywords |
sep |
a character string to separate the terms. Default is |
... |
Not yet used |
Prepare a list of contrasts which combines regression coefficients to describe statistical interactions.
List of contrasts which can be passed to
lava::estimate
.
Thomas A. Gerds <[email protected]>
lava::estimate
tt <- terms(formula(SBP~age+sex*BMI)) xlev <- list(sex=c("male","female"),BMI=c("normal","overweight","obese")) parseInteractionTerms(terms=tt,xlevels=xlev) parseInteractionTerms(terms=tt,xlevels=xlev,format.factor="var level") parseInteractionTerms(terms=tt,xlevels=xlev,format.contrast="var(level:ref)") tt2 <- terms(formula(SBP~age*factor(sex)+BMI)) xlev2 <- list("factor(sex)"=c("male","female")) parseInteractionTerms(terms=tt2,xlevels=xlev2) parseInteractionTerms(terms=tt2,xlevels=xlev2,units=list(age="yrs")) data(Diabetes) fit <- glm(bp.2s~age*factor(gender)+BMI,data=Diabetes) parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels, format.scale="var -- level:ref",units=list("age"='years')) parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels, format.scale.unit="var [unit]",units=list("age"='years')) it <- parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels) ivars <- unlist(lapply(it,function(x)attr(x,"variables"))) lava::estimate(fit,function(p)lapply(unlist(it),eval,envir=sys.parent(-1)))
tt <- terms(formula(SBP~age+sex*BMI)) xlev <- list(sex=c("male","female"),BMI=c("normal","overweight","obese")) parseInteractionTerms(terms=tt,xlevels=xlev) parseInteractionTerms(terms=tt,xlevels=xlev,format.factor="var level") parseInteractionTerms(terms=tt,xlevels=xlev,format.contrast="var(level:ref)") tt2 <- terms(formula(SBP~age*factor(sex)+BMI)) xlev2 <- list("factor(sex)"=c("male","female")) parseInteractionTerms(terms=tt2,xlevels=xlev2) parseInteractionTerms(terms=tt2,xlevels=xlev2,units=list(age="yrs")) data(Diabetes) fit <- glm(bp.2s~age*factor(gender)+BMI,data=Diabetes) parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels, format.scale="var -- level:ref",units=list("age"='years')) parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels, format.scale.unit="var [unit]",units=list("age"='years')) it <- parseInteractionTerms(terms=terms(fit$formula),xlevels=fit$xlevels) ivars <- unlist(lapply(it,function(x)attr(x,"variables"))) lava::estimate(fit,function(p)lapply(unlist(it),eval,envir=sys.parent(-1)))
Function to plot confidence intervals
## S3 method for class 'ci' plot(x, xlim, xlab = "", labels, ...)
## S3 method for class 'ci' plot(x, xlim, xlab = "", labels, ...)
x |
List, data.frame or other object of this form containing point estimates (first element) and the corresponding confidence intervals as elements lower and upper. |
xlim |
Limit of the x-axis |
xlab |
Label for the y-axis |
labels |
labels |
... |
Used to transport arguments to |
Function to plot means and other point estimates with confidence intervals
Thomas A. Gerds <[email protected]>
data(Diabetes) x=ci.mean(bp.2s~AgeGroups,data=Diabetes) plot(x,title.labels="Age groups",xratio=c(0.4,0.3)) x=ci.mean(bp.2s/500~AgeGroups+gender,data=Diabetes) plot(x,xratio=c(0.4,0.2)) plot(x,xratio=c(0.4,0.2), labels=split(x$labels[,"AgeGroups"],x$labels[,"gender"]), title.labels="Age groups") ## Not run: plot(x, leftmargin=0, rightmargin=0) plotConfidence(x, leftmargin=0, rightmargin=0) data(CiTable) with(CiTable,plotConfidence(x=list(HazardRatio), lower=lower, upper=upper, labels=CiTable[,2:6], factor.reference.pos=c(1,10,19), format="(u-l)", points.col="blue", digits=2)) with(CiTable,Publish::plot.ci(x=list(HazardRatio), lower=lower, upper=upper, labels=CiTable[,2:6], factor.reference.pos=c(1,10,19), format="(u-l)", points.col="blue", digits=2, leftmargin=-2, title.labels.cex=1.1, labels.cex=0.8,values.cex=0.8)) ## End(Not run)
data(Diabetes) x=ci.mean(bp.2s~AgeGroups,data=Diabetes) plot(x,title.labels="Age groups",xratio=c(0.4,0.3)) x=ci.mean(bp.2s/500~AgeGroups+gender,data=Diabetes) plot(x,xratio=c(0.4,0.2)) plot(x,xratio=c(0.4,0.2), labels=split(x$labels[,"AgeGroups"],x$labels[,"gender"]), title.labels="Age groups") ## Not run: plot(x, leftmargin=0, rightmargin=0) plotConfidence(x, leftmargin=0, rightmargin=0) data(CiTable) with(CiTable,plotConfidence(x=list(HazardRatio), lower=lower, upper=upper, labels=CiTable[,2:6], factor.reference.pos=c(1,10,19), format="(u-l)", points.col="blue", digits=2)) with(CiTable,Publish::plot.ci(x=list(HazardRatio), lower=lower, upper=upper, labels=CiTable[,2:6], factor.reference.pos=c(1,10,19), format="(u-l)", points.col="blue", digits=2, leftmargin=-2, title.labels.cex=1.1, labels.cex=0.8,values.cex=0.8)) ## End(Not run)
Plotting regression coefficients with confidence limits
## S3 method for class 'regressionTable' plot(x, xlim, xlab, style = 1, ...)
## S3 method for class 'regressionTable' plot(x, xlim, xlab, style = 1, ...)
x |
regression table obtained with regressionTable |
xlim |
Limits for x-axis |
xlab |
Label for x-axis |
style |
Determines how to arrange variable names and their corresponding units |
... |
passed to plotConfidence |
Thomas A. Gerds <[email protected]>
regressionTable
## linear regression data(Diabetes) f <- glm(bp.1s~AgeGroups+chol+gender+location,data=Diabetes) rtf <- regressionTable(f,factor.reference = "inline") plot(rtf,cex=1.3) ## logistic regression data(Diabetes) f <- glm(I(BMI>25)~bp.1s+AgeGroups+chol+gender+location,data=Diabetes,family=binomial) rtf <- regressionTable(f,factor.reference = "inline") plot(rtf,cex=1.3) ## Poisson regression data(trace) fit <- glm(dead ~ smoking+ sex+ age+Time+offset(log(ObsTime)), family = poisson,data=trace) rtab <- regressionTable(fit,factor.reference = "inline") plot(rtab,xlim=c(0.85,1.15),cex=1.8,xaxis.cex=1.5) ## Cox regression library(survival) data(pbc) coxfit <- coxph(Surv(time,status!=0)~age+log(bili)+log(albumin)+factor(edema)+sex,data=pbc) pubcox <- publish(coxfit) plot(pubcox,cex=1.5,xratio=c(0.4,0.2))
## linear regression data(Diabetes) f <- glm(bp.1s~AgeGroups+chol+gender+location,data=Diabetes) rtf <- regressionTable(f,factor.reference = "inline") plot(rtf,cex=1.3) ## logistic regression data(Diabetes) f <- glm(I(BMI>25)~bp.1s+AgeGroups+chol+gender+location,data=Diabetes,family=binomial) rtf <- regressionTable(f,factor.reference = "inline") plot(rtf,cex=1.3) ## Poisson regression data(trace) fit <- glm(dead ~ smoking+ sex+ age+Time+offset(log(ObsTime)), family = poisson,data=trace) rtab <- regressionTable(fit,factor.reference = "inline") plot(rtab,xlim=c(0.85,1.15),cex=1.8,xaxis.cex=1.5) ## Cox regression library(survival) data(pbc) coxfit <- coxph(Surv(time,status!=0)~age+log(bili)+log(albumin)+factor(edema)+sex,data=pbc) pubcox <- publish(coxfit) plot(pubcox,cex=1.5,xratio=c(0.4,0.2))
This function operates on a "subgroupAnalysis" object to produce a formatted table and a forest plot
## S3 method for class 'subgroupAnalysis' plot(x, ...)
## S3 method for class 'subgroupAnalysis' plot(x, ...)
x |
- a subgroupAnalysis object |
... |
- passed on to plotConfidence |
This function produces a formatted table of a subgroupAnalysis object and adds a forest plot. If further details needs attention before plotting is is advisable use adjust the table produced by the summary function and then plotting with the plotConfidence function
Christian Torp-Pedersen
subgroupAnalysis, plotConfidence
#load libraries library(Publish) library(survival) library(data.table) data(traceR) #get dataframe traceR setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")), sex=factor(sex))] fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) # Selected subgroups - univariable analysis sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroup=c("smoking","sex","wmi2","abd2")) # subgroups as character string plot(sub_cox)
#load libraries library(Publish) library(survival) library(data.table) data(traceR) #get dataframe traceR setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")), sex=factor(sex))] fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) # Selected subgroups - univariable analysis sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroup=c("smoking","sex","wmi2","abd2")) # subgroups as character string plot(sub_cox)
Function to plot confidence intervals with their values and additional labels. One anticipated use of this function involves first the generation of a regression object, then arrangement of a result table with "regressionTable", further arrangment of table with with e.g. "fixRegressionTable" and various user defined changes - and then finally table along with forest plot using the current function.
plotConfidence( x, y.at, lower, upper, pch = 16, cex = 1, lwd = 1, col = 4, xlim, xlab, labels, title.labels, values, title.values, section.pos, section.sep, section.title = NULL, section.title.x, section.title.offset, order, leftmargin = 0.025, rightmargin = 0.025, stripes, factor.reference.pos, factor.reference.label = "Reference", factor.reference.pch = 16, refline = 1, title.line = TRUE, xratio, y.offset = 0, y.title.offset, digits = 2, format, extremearrows.length = 0.05, extremearrows.angle = 30, add = FALSE, layout = TRUE, xaxis = TRUE, ... )
plotConfidence( x, y.at, lower, upper, pch = 16, cex = 1, lwd = 1, col = 4, xlim, xlab, labels, title.labels, values, title.values, section.pos, section.sep, section.title = NULL, section.title.x, section.title.offset, order, leftmargin = 0.025, rightmargin = 0.025, stripes, factor.reference.pos, factor.reference.label = "Reference", factor.reference.pch = 16, refline = 1, title.line = TRUE, xratio, y.offset = 0, y.title.offset, digits = 2, format, extremearrows.length = 0.05, extremearrows.angle = 30, add = FALSE, layout = TRUE, xaxis = TRUE, ... )
x |
Either a vector containing the point estimates or a list whose first element contains the point estimates. Further list elements can contain the confidence intervals and labels. In this case the list needs to have names 'lower' and 'upper' to indicate the values of the lower and the upper limits of the confidence intervals, respectively, and may have an element 'labels' which is a vector or matrix or list with labels. |
y.at |
Optional vector of y-position for the confidence intervals and corresponding values and labels. |
lower |
Lower confidence limits. Used if object |
upper |
Upper confidence limits. Used if object |
pch |
Symbol for points. |
cex |
Defaults size of all figures and plotting symbol.
Single elements are controlled separately. See |
lwd |
Default width of all lines Single elements are
controlled separately. See |
col |
Default colour of confidence intervals. |
xlim |
Plotting limits for the confidence intervals. See also
|
xlab |
Label for the x-axis. |
labels |
Vector or matrix or list with |
title.labels |
Main title for the column which shows the |
values |
Either logical or vector, matrix or list with
values. If |
title.values |
Main title for the column |
section.pos |
Vector with y-axis posititions for section.titles. |
section.sep |
Amount of space between paragraphs (applies only if |
section.title |
Intermediate section headings. |
section.title.x |
x-position for section.titles |
section.title.offset |
Y-offset for section.titles |
order |
Order of the three columns: labels, confidence limits, values. See examples. |
leftmargin |
Percentage of plotting region used for leftmargin. Default is 0.025. See also Details. |
rightmargin |
Percentage of plotting region used for rightmargin. Default is 0.025. See also Details. |
stripes |
Vector of up to three Logicals. If |
factor.reference.pos |
Position at which factors attain reference values. |
factor.reference.label |
Label to use at
|
factor.reference.pch |
Plotting symbol to use at
|
refline |
Position of a vertical line to indicate the null hypothesis. Default is 1 which would work for odds ratios and hazard ratios. |
title.line |
Position of a horizontal line to separate the title line from the plot |
xratio |
One or two values between 0 and 1 which determine how to split the plot window in horizontal x-direction. If there are two columns (labels, CI) or (CI, values) only one value is used and the default is 0.618 (goldener schnitt) which gives the graphical presentation of the confidence intervals 38.2 graph. The remaining 61.8 If there are three columns (labels, CI, values), xratio has two values which default to fractions of 0.7 according to the relative widths of labels and values, thus by default only 0.3 are used for the graphical presentation of the confidence intervals. The remaining 30 confidence intervals. See examles. |
y.offset |
Either a single value or a vector determining the vertical offset of all rows. If it is a single value all rows are shifted up (or down if negative) by this value. This can be used to add a second set of confidence intervals to an existing graph or to achieve a visual grouping of rows that belong together. See examples. |
y.title.offset |
Numeric value by which to vertically shift the titles of the labels and values. |
digits |
Number of digits, passed to |
format |
Format for constructing values of confidence intervals. Defaults to '(u;l)' if there are negative lower or upper values and to '(u-l)' otherwise. |
extremearrows.length |
Length of the arrows in case of confidence intervals that stretch beyond xlim. |
extremearrows.angle |
Angle of the arrows in case of confidence intervals that stretch beyond xlim. |
add |
Logical. If |
layout |
Logical. If |
xaxis |
Logical. If |
... |
Used to control arguments of the following subroutines:
|
Function to plot means and other point estimates with confidence intervals, their values and additional labels . Horizonal margins as determined by par()$mar are ignored. Instead layout is used to divide the plotting region horizontally into two or three parts plus leftmargin and rightmargin.
When values is FALSE there are only two parts. The default order is labels on the left confidence intervals on the right. When no labels are given or labels is FALSE there are only two parts. The default order is confidence intervals on the left values on the right.
The default order of three parts from left to right is labels, confidence intervals, values. The order can be changed as shown by the examples below. The relative widths of the two or three parts need to be adapted to the actual size of the text of the labels. This depends on the plotting device and the size of the font and figures and thus has to be adjusted manually.
Oma can be used to further control horizontal margins, e.g., par(oma=c(0,4,0,4)).
If confidence limits extend beyond the range determined by xlim, then arrows are drawn at the x-lim borders to indicate that the confidence limits continue.
List of dimensions and coordinates
Thomas A. Gerds <[email protected]>
library(Publish) data(CiTable) ## A first draft version of the plot is obtained as follows plotConfidence(x=CiTable[,c("HazardRatio","lower","upper","p")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")]) ## if argument labels is a named list the table is subdivided: labellist <- split(CiTable[,c("Dose","Time","Mean","SD","n")],CiTable[,"Drug"]) labellist ## the data need to be ordered accordingly CC= data.table::rbindlist(split(CiTable[,c("HazardRatio","lower","upper")],CiTable[,"Drug"])) plotConfidence(x=CC, labels=labellist) ## The graph consist of at most three columns: ## ## column 1: labels ## column 2: printed values of the confidence intervals ## column 3: graphical presentation of the confidence intervals ## ## NOTE: column 3 appears always, the user decides if also ## column 1, 2 should appear ## ## The columns are arranged with the function layout ## and the default order is 1,3,2 such that the graphical ## display of the confidence intervals appears in the middle ## ## the order of appearance of the three columns can be changed as follows plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], order=c(1,3,2)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], order=c(2,3,1)) ## if there are only two columns the order is 1, 2 plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, order=c(2,1)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, order=c(1,2)) ## The relative size of the columns needs to be controlled manually ## by using the argument xratio. If there are only two columns plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xratio=c(0.4,0.15)) ## The amount of space on the left and right margin can be controlled ## as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xratio=c(0.4,0.15), leftmargin=0.1,rightmargin=0.00) ## The actual size of the current graphics device determines ## the size of the figures and the space between them. ## The sizes and line widths are increased as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], points.cex=3, cex=2, lwd=3, xaxis.lwd=1.3, xaxis.cex=1.3) ## Note that 'cex' of axis ticks is controlled via 'par' but ## cex of the label via argument 'cex' of 'mtext'. ## The sizes and line widths are decreased as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], cex=0.8, lwd=0.8, xaxis.lwd=0.8, xaxis.cex=0.8) ## Another good news is that all figures can be controlled separately ## The size of the graphic device can be controlled in the usual way, e.g.: ## Not run: pdf("~/tmp/testCI.pdf",width=8,height=8) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")]) dev.off() ## End(Not run) ## More control of the x-axis and confidence intervals that ## stretch outside the x-range end in an arrow. ## the argument xlab.line adjusts the distance of the x-axis ## label from the graph plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", xlab.line=1.8, xaxis.at=c(0.8,1,1.3), labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3)) ## log-scale plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", xlab.line=1.8, xaxis.at=c(0.8,1,1.3), labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3),plot.log="x") ## More pronounced arrows ## Coloured xlab expression plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab=expression(HR[1](s)), xlab.line=1.8, xlab.col="darkred", extremearrows.angle=50, extremearrows.length=0.1, labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3)) ## Controlling the labels and their titles ## and the values and their titles plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", title.values=expression(bold(HR (CI[95]))), title.labels=c("Drug/Time","Dose","Mean","St.dev.","N"), factor.reference.pos=c(1,10,19), factor.reference.pch=16, cex=1.3, xaxis.at=c(0.75,1,1.25,1.5,2)) ## For factor reference groups, one may want to replace the ## confidence intervals by the word Reference, as in the previous example. ## To change the word 'Reference' we use the argument factor.reference.label: ## To change the plot symbol for the reference lines factor.reference.pch ## To remove the plot symbol in the reference lines use 'NA' as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", factor.reference.label="Ref", title.values=expression(bold(HR (CI[95]))), title.labels=c("Drug/Time","Dose","Mean","St.dev.","N"), factor.reference.pos=c(1,10,19), factor.reference.pch=NA, cex=1.3, xaxis.at=c(0.75,1,1.25,1.5,2)) ## changing the style of the graphical confidence intervals plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", factor.reference.pos=c(1,10,19), points.pch=15, points.col=rainbow(27), points.cex=2, arrows.col="darkblue", cex=1.3, order=c(1,3,2), xaxis.at=c(0.75,1,1.25,1.5)) ## the values column of the graph can have multiple columns as well ## to illustrate this we create the confidence intervals ## before calling the function and then cbind them ## to the pvalues HR <- pubformat(CiTable[,6]) CI95 <- formatCI(lower=CiTable[,7],upper=CiTable[,8],format="(l-u)") pval <- format.pval(CiTable[,9],digits=3,eps=10^{-3}) pval[pval=="NA"] <- "" plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=list("HR"=HR,"CI-95"=CI95,"P-value"=pval), cex=1.2, xratio=c(0.5,0.3)) ## Finally, vertical columns can be delimited with background color ## NOTE: this may slow things down and potentially create ## large figures (many bytes) col1 <- rep(c(prodlim::dimColor("green",density=22), prodlim::dimColor("green")),length.out=9) col2 <- rep(c(prodlim::dimColor("orange",density=22), prodlim::dimColor("orange")),length.out=9) col3 <- rep(c(prodlim::dimColor("blue",density=22), prodlim::dimColor("blue")),length.out=9) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], stripes=c(1,0,1), stripes.col=c(col1,col2,col3)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], stripes=c(1,1,1), stripes.col=c(col1,col2,col3)) threegreens <- c(prodlim::dimColor("green",density=55), prodlim::dimColor("green",density=33), prodlim::dimColor("green",density=22)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, xlim=c(0.75,1.5), stripes=c(1,1,1), xratio=c(0.5,0.15), stripes.horizontal=c(0,9,18,27)+0.5, stripes.col=threegreens) # combining multiple plots into one layout(t(matrix(1:5))) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Mean","n")], layout=FALSE) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], layout=FALSE)
library(Publish) data(CiTable) ## A first draft version of the plot is obtained as follows plotConfidence(x=CiTable[,c("HazardRatio","lower","upper","p")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")]) ## if argument labels is a named list the table is subdivided: labellist <- split(CiTable[,c("Dose","Time","Mean","SD","n")],CiTable[,"Drug"]) labellist ## the data need to be ordered accordingly CC= data.table::rbindlist(split(CiTable[,c("HazardRatio","lower","upper")],CiTable[,"Drug"])) plotConfidence(x=CC, labels=labellist) ## The graph consist of at most three columns: ## ## column 1: labels ## column 2: printed values of the confidence intervals ## column 3: graphical presentation of the confidence intervals ## ## NOTE: column 3 appears always, the user decides if also ## column 1, 2 should appear ## ## The columns are arranged with the function layout ## and the default order is 1,3,2 such that the graphical ## display of the confidence intervals appears in the middle ## ## the order of appearance of the three columns can be changed as follows plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], order=c(1,3,2)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], order=c(2,3,1)) ## if there are only two columns the order is 1, 2 plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, order=c(2,1)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, order=c(1,2)) ## The relative size of the columns needs to be controlled manually ## by using the argument xratio. If there are only two columns plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xratio=c(0.4,0.15)) ## The amount of space on the left and right margin can be controlled ## as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xratio=c(0.4,0.15), leftmargin=0.1,rightmargin=0.00) ## The actual size of the current graphics device determines ## the size of the figures and the space between them. ## The sizes and line widths are increased as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], points.cex=3, cex=2, lwd=3, xaxis.lwd=1.3, xaxis.cex=1.3) ## Note that 'cex' of axis ticks is controlled via 'par' but ## cex of the label via argument 'cex' of 'mtext'. ## The sizes and line widths are decreased as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], cex=0.8, lwd=0.8, xaxis.lwd=0.8, xaxis.cex=0.8) ## Another good news is that all figures can be controlled separately ## The size of the graphic device can be controlled in the usual way, e.g.: ## Not run: pdf("~/tmp/testCI.pdf",width=8,height=8) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")]) dev.off() ## End(Not run) ## More control of the x-axis and confidence intervals that ## stretch outside the x-range end in an arrow. ## the argument xlab.line adjusts the distance of the x-axis ## label from the graph plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", xlab.line=1.8, xaxis.at=c(0.8,1,1.3), labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3)) ## log-scale plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab="Hazard ratio", xlab.line=1.8, xaxis.at=c(0.8,1,1.3), labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3),plot.log="x") ## More pronounced arrows ## Coloured xlab expression plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], xlab=expression(HR[1](s)), xlab.line=1.8, xlab.col="darkred", extremearrows.angle=50, extremearrows.length=0.1, labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlim=c(0.8,1.3)) ## Controlling the labels and their titles ## and the values and their titles plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", title.values=expression(bold(HR (CI[95]))), title.labels=c("Drug/Time","Dose","Mean","St.dev.","N"), factor.reference.pos=c(1,10,19), factor.reference.pch=16, cex=1.3, xaxis.at=c(0.75,1,1.25,1.5,2)) ## For factor reference groups, one may want to replace the ## confidence intervals by the word Reference, as in the previous example. ## To change the word 'Reference' we use the argument factor.reference.label: ## To change the plot symbol for the reference lines factor.reference.pch ## To remove the plot symbol in the reference lines use 'NA' as follows: plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", factor.reference.label="Ref", title.values=expression(bold(HR (CI[95]))), title.labels=c("Drug/Time","Dose","Mean","St.dev.","N"), factor.reference.pos=c(1,10,19), factor.reference.pch=NA, cex=1.3, xaxis.at=c(0.75,1,1.25,1.5,2)) ## changing the style of the graphical confidence intervals plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], xlab="Hazard ratio", factor.reference.pos=c(1,10,19), points.pch=15, points.col=rainbow(27), points.cex=2, arrows.col="darkblue", cex=1.3, order=c(1,3,2), xaxis.at=c(0.75,1,1.25,1.5)) ## the values column of the graph can have multiple columns as well ## to illustrate this we create the confidence intervals ## before calling the function and then cbind them ## to the pvalues HR <- pubformat(CiTable[,6]) CI95 <- formatCI(lower=CiTable[,7],upper=CiTable[,8],format="(l-u)") pval <- format.pval(CiTable[,9],digits=3,eps=10^{-3}) pval[pval=="NA"] <- "" plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=list("HR"=HR,"CI-95"=CI95,"P-value"=pval), cex=1.2, xratio=c(0.5,0.3)) ## Finally, vertical columns can be delimited with background color ## NOTE: this may slow things down and potentially create ## large figures (many bytes) col1 <- rep(c(prodlim::dimColor("green",density=22), prodlim::dimColor("green")),length.out=9) col2 <- rep(c(prodlim::dimColor("orange",density=22), prodlim::dimColor("orange")),length.out=9) col3 <- rep(c(prodlim::dimColor("blue",density=22), prodlim::dimColor("blue")),length.out=9) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], stripes=c(1,0,1), stripes.col=c(col1,col2,col3)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], stripes=c(1,1,1), stripes.col=c(col1,col2,col3)) threegreens <- c(prodlim::dimColor("green",density=55), prodlim::dimColor("green",density=33), prodlim::dimColor("green",density=22)) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Drug.Time","Dose","Mean","SD","n")], values=FALSE, xlim=c(0.75,1.5), stripes=c(1,1,1), xratio=c(0.5,0.15), stripes.horizontal=c(0,9,18,27)+0.5, stripes.col=threegreens) # combining multiple plots into one layout(t(matrix(1:5))) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], labels=CiTable[,c("Mean","n")], layout=FALSE) plotConfidence(x=CiTable[,c("HazardRatio","lower","upper")], layout=FALSE)
Print confidence intervals
## S3 method for class 'ci' print(x, se = FALSE, print = TRUE, ...)
## S3 method for class 'ci' print(x, se = FALSE, print = TRUE, ...)
x |
Object containing point estimates and the corresponding confidence intervals |
se |
If |
print |
Logical: if |
... |
passed to summary.ci |
This format of the confidence intervals is user-manipulable.
A string: the formatted confidence intervals
Thomas A. Gerds <[email protected]>
ci plot.ci formatCI summary.ci
library(lava) m <- lvm(Y~X) m <- categorical(m,Y~X,K=4) set.seed(4) d <- sim(m,24) ci.mean(Y~X,data=d) x <- ci.mean(Y~X,data=d) print(x,format="(l,u)")
library(lava) m <- lvm(Y~X) m <- categorical(m,Y~X,K=4) set.seed(4) d <- sim(m,24) ci.mean(Y~X,data=d) x <- ci.mean(Y~X,data=d) print(x,format="(l,u)")
Print function for subgroupAnalysis
## S3 method for class 'subgroupAnalysis' print(x, ...)
## S3 method for class 'subgroupAnalysis' print(x, ...)
x |
- An object obtained with |
... |
Passed to summary.subgroupAnalysis |
This function is simply calling summary.subgroupAnalysis
The result of summary.subgroupAnalysis(x)
Christian Torp-Pedersen ([email protected])
subgroupAnalysis
print results of 2x2 contingency table analysis
## S3 method for class 'table2x2' print(x, digits = 1, ...)
## S3 method for class 'table2x2' print(x, digits = 1, ...)
x |
object obtained with table2x2 |
digits |
rounding digits |
... |
not used |
invisible x
Thomas A. Gerds <[email protected]>
table2x2
table2x2(table("marker"=rbinom(100,1,0.4),"response"=rbinom(100,1,0.1))) table2x2(matrix(c(71,18,38,8),ncol=2),stats="table") table2x2(matrix(c(71,18,38,8),ncol=2),stats=c("rr","fisher"))
table2x2(table("marker"=rbinom(100,1,0.4),"response"=rbinom(100,1,0.1))) table2x2(matrix(c(71,18,38,8),ncol=2),stats="table") table2x2(matrix(c(71,18,38,8),ncol=2),stats=c("rr","fisher"))
Print function for univariate tables
## S3 method for class 'univariateTable' print(x, ...)
## S3 method for class 'univariateTable' print(x, ...)
x |
An object obtained with |
... |
Passed to summary.univariateTable |
This function is simply calling summary.univariateTable
The result of summary.univariateTable(x)
Thomas A. Gerds <[email protected]>
univariateTable
Format numbers according to a specified handler function. Currently supported are sprintf, format and prettyNum.
pubformat(x, digits = 2, nsmall = digits, handler = "sprintf", ...)
pubformat(x, digits = 2, nsmall = digits, handler = "sprintf", ...)
x |
numeric vector |
digits |
number of digits |
nsmall |
see handler |
handler |
String specififying the name of the function which should
perform the formatting. See |
... |
Passed to handler function if applicable, i.e., not to |
Formatted number
Thomas A. Gerds <[email protected]>
sprintf
, format
, prettyNum
pubformat(c(0.000143,12.8,1)) pubformat(c(0.000143,12.8,1),handler="format") pubformat(c(0.000143,12.8,1),handler="format",trim=TRUE) pubformat(c(0.000143,12.8,1),handler="prettyNum")
pubformat(c(0.000143,12.8,1)) pubformat(c(0.000143,12.8,1),handler="format") pubformat(c(0.000143,12.8,1),handler="format",trim=TRUE) pubformat(c(0.000143,12.8,1),handler="prettyNum")
Publish provides summary functions for data and results of statistical analysis in ready-for-publication design
publish(object, ...)
publish(object, ...)
object |
object to be published |
... |
Passed to method. |
Some warnings are currently suppressed.
Tables and figures
Thomas A. Gerds <[email protected]>
publish.CauseSpecificCox publish.ci publish.coxph publish.glm publish.riskRegression publish.survdiff
Publish cause-specific Cox models
## S3 method for class 'CauseSpecificCox' publish( object, cause, confint.method, pvalue.method, factor.reference = "extraline", units = NULL, print = TRUE, ... )
## S3 method for class 'CauseSpecificCox' publish( object, cause, confint.method, pvalue.method, factor.reference = "extraline", units = NULL, print = TRUE, ... )
object |
Cause-specific hazard model obtained with
|
cause |
Show a table for this cause. If omitted, list all causes. |
confint.method |
See |
pvalue.method |
See |
factor.reference |
See |
units |
See |
print |
If |
... |
passed on to control formatting of parameters,
confidence intervals and p-values. See
|
The cause-specific hazard ratio's are combined into one table.
Table with cause-specific hazard ratios, confidence limits and p-values.
Thomas Alexander Gerds <[email protected]>
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(prodlim) library(survival) data(Melanoma,package="riskRegression") fit1 <- CSC(list(Hist(time,status)~sex,Hist(time,status)~invasion+epicel+age), data=Melanoma) publish(fit1) publish(fit1,pvalue.stars=TRUE) publish(fit1,factor.reference="inline",units=list("age"="years")) # wide format (same variables in both Cox regression formula) fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) publish(fit2) # with p-values x <- publish(fit2,print=FALSE) table <- cbind(x[[1]]$regressionTable, x[[2]]$regressionTable[,-c(1,2)]) }
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(prodlim) library(survival) data(Melanoma,package="riskRegression") fit1 <- CSC(list(Hist(time,status)~sex,Hist(time,status)~invasion+epicel+age), data=Melanoma) publish(fit1) publish(fit1,pvalue.stars=TRUE) publish(fit1,factor.reference="inline",units=list("age"="years")) # wide format (same variables in both Cox regression formula) fit2 <- CSC(Hist(time,status)~invasion+epicel+age, data=Melanoma) publish(fit2) # with p-values x <- publish(fit2,print=FALSE) table <- cbind(x[[1]]$regressionTable, x[[2]]$regressionTable[,-c(1,2)]) }
Publish tables with confidence intervals
## S3 method for class 'ci' publish(object, format = "[u;l]", se = FALSE, ...)
## S3 method for class 'ci' publish(object, format = "[u;l]", se = FALSE, ...)
object |
Object of class ci containing point estimates and the corresponding confidence intervals |
format |
A string which indicates the format used for
confidence intervals. The string is passed to
|
se |
If |
... |
passed to |
This function calls summary.ci with print=FALSE and then publish
table with confidence intervals
Thomas A. Gerds <[email protected]>
summary.ci
data(Diabetes) publish(ci.mean(chol~location+gender,data=Diabetes),org=TRUE)
data(Diabetes) publish(ci.mean(chol~location+gender,data=Diabetes),org=TRUE)
Tabulize the part of the result of a Cox regression analysis which is commonly shown in publications.
## S3 method for class 'coxph' publish( object, confint.method, pvalue.method, print = TRUE, factor.reference = "extraline", units = NULL, probindex = FALSE, ... )
## S3 method for class 'coxph' publish( object, confint.method, pvalue.method, print = TRUE, factor.reference = "extraline", units = NULL, probindex = FALSE, ... )
object |
A |
confint.method |
See |
pvalue.method |
See |
print |
If |
factor.reference |
See |
units |
See |
probindex |
Logical. If |
... |
passed to |
Transforms the log hazard ratios to hazard ratios and returns them with confidence limits and p-values. If explanatory variables are log transformed or log2 transformed, a scaling factor is multiplied to both the log-hazard ratio and its standard-error.
Table with hazard ratios, confidence intervals and p-values.
Thomas Alexander Gerds
library(survival) data(pbc) pbc$edema <- factor(pbc$edema, levels=c("0","0.5","1"), labels=c("0","0.5","1")) fit = coxph(Surv(time,status!=0)~age+sex+edema+log(bili)+log(albumin), data=na.omit(pbc)) publish(fit) ## forest plot plot(publish(fit),cex=1.3) publish(fit,ci.digits=2,pvalue.eps=0.01,pvalue.digits=2,pvalue.stars=TRUE) publish(fit,ci.digits=2,ci.handler="prettyNum",pvalue.eps=0.01, pvalue.digits=2,pvalue.stars=TRUE) publish(fit, ci.digits=2, ci.handler="sprintf", pvalue.eps=0.01, pvalue.digits=2,pvalue.stars=TRUE, ci.trim=FALSE) fit2 = coxph(Surv(time,status!=0)~age+sex+edema+log(bili,base=2)+log(albumin)+log(protime), data=na.omit(pbc)) publish(fit2) # with cluster variable fit3 = coxph(Surv(time,status!=0)~age+cluster(sex)+edema+log(bili,base=2) +log(albumin)+log(protime), data=na.omit(pbc)) publish(fit3) # with strata and cluster variable fit4 = coxph(Surv(time,status!=0)~age+cluster(sex)+strata(edema)+log(bili,base=2) +log(albumin)+log(protime), data=pbc) publish(fit4)
library(survival) data(pbc) pbc$edema <- factor(pbc$edema, levels=c("0","0.5","1"), labels=c("0","0.5","1")) fit = coxph(Surv(time,status!=0)~age+sex+edema+log(bili)+log(albumin), data=na.omit(pbc)) publish(fit) ## forest plot plot(publish(fit),cex=1.3) publish(fit,ci.digits=2,pvalue.eps=0.01,pvalue.digits=2,pvalue.stars=TRUE) publish(fit,ci.digits=2,ci.handler="prettyNum",pvalue.eps=0.01, pvalue.digits=2,pvalue.stars=TRUE) publish(fit, ci.digits=2, ci.handler="sprintf", pvalue.eps=0.01, pvalue.digits=2,pvalue.stars=TRUE, ci.trim=FALSE) fit2 = coxph(Surv(time,status!=0)~age+sex+edema+log(bili,base=2)+log(albumin)+log(protime), data=na.omit(pbc)) publish(fit2) # with cluster variable fit3 = coxph(Surv(time,status!=0)~age+cluster(sex)+edema+log(bili,base=2) +log(albumin)+log(protime), data=na.omit(pbc)) publish(fit3) # with strata and cluster variable fit4 = coxph(Surv(time,status!=0)~age+cluster(sex)+strata(edema)+log(bili,base=2) +log(albumin)+log(protime), data=pbc) publish(fit4)
Tabulate the results of a generalized linear regression analysis.
## S3 method for class 'glm' publish( object, confint.method, pvalue.method, digits = c(2, 4), print = TRUE, factor.reference = "extraline", intercept = ifelse((is.null(object$family) || object$family$family == "gaussian"), 1L, 0L), units = NULL, ... )
## S3 method for class 'glm' publish( object, confint.method, pvalue.method, digits = c(2, 4), print = TRUE, factor.reference = "extraline", intercept = ifelse((is.null(object$family) || object$family$family == "gaussian"), 1L, 0L), units = NULL, ... )
object |
A |
confint.method |
See |
pvalue.method |
See |
digits |
A vector of two integer values. These determine how to round
numbers (first value) and p-values (second value). E.g., c(1,3) would
mean 1 digit for all numbers and 3 digits for p-values.
The actual rounding is done by |
print |
If |
factor.reference |
Style for showing results for categorical. See |
intercept |
See |
units |
See |
... |
passed to |
reference |
Style for showing results for categorical
variables. If |
The table shows changes in mean for linear regression and odds ratios for logistic regression (family = binomial).
Table with regression coefficients, confidence intervals and p-values.
Thomas Alexander Gerds <[email protected]>
data(Diabetes) ## Linear regression f = glm(bp.2s~frame+gender+age,data=Diabetes) publish(f) publish(f,factor.reference="inline") publish(f,pvalue.stars=TRUE) publish(f,ci.format="(l,u)") ### interaction fit = glm(bp.2s~frame+gender*age,data=Diabetes) summary(fit) publish(fit) Fit = glm(bp.2s~frame*gender+age,data=Diabetes) publish(Fit) ## Logistic regression Diabetes$hyper1 <- factor(1*(Diabetes$bp.1s>140)) lrfit <- glm(hyper1~frame+gender+age,data=Diabetes,family=binomial) publish(lrfit) ### interaction lrfit1 <- glm(hyper1~frame+gender*age,data=Diabetes,family=binomial) publish(lrfit1) lrfit2 <- glm(hyper1~frame*gender+age,data=Diabetes,family=binomial) publish(lrfit2) ## Poisson regression data(trace) trace <- Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit) ## gls regression if (requireNamespace("nlme",quietly=TRUE)){ requireNamespace("lava",quietly=TRUE) library(lava) library(nlme) m <- lvm(Y ~ X1 + gender + group + Interaction) distribution(m, ~gender) <- binomial.lvm() distribution(m, ~group) <- binomial.lvm(size = 2) constrain(m, Interaction ~ gender + group) <- function(x){x[,1]*x[,2]} d <- sim(m, 1e2) d$gender <- factor(d$gender, labels = letters[1:2]) d$group <- factor(d$group) e.gls <- gls(Y ~ X1 + gender*group, data = d, weights = varIdent(form = ~1|group)) publish(e.gls) ## lme fm1 <- lme(distance ~ age*Sex, random = ~1|Subject, data = Orthodont) res <- publish(fm1) }
data(Diabetes) ## Linear regression f = glm(bp.2s~frame+gender+age,data=Diabetes) publish(f) publish(f,factor.reference="inline") publish(f,pvalue.stars=TRUE) publish(f,ci.format="(l,u)") ### interaction fit = glm(bp.2s~frame+gender*age,data=Diabetes) summary(fit) publish(fit) Fit = glm(bp.2s~frame*gender+age,data=Diabetes) publish(Fit) ## Logistic regression Diabetes$hyper1 <- factor(1*(Diabetes$bp.1s>140)) lrfit <- glm(hyper1~frame+gender+age,data=Diabetes,family=binomial) publish(lrfit) ### interaction lrfit1 <- glm(hyper1~frame+gender*age,data=Diabetes,family=binomial) publish(lrfit1) lrfit2 <- glm(hyper1~frame*gender+age,data=Diabetes,family=binomial) publish(lrfit2) ## Poisson regression data(trace) trace <- Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit) ## gls regression if (requireNamespace("nlme",quietly=TRUE)){ requireNamespace("lava",quietly=TRUE) library(lava) library(nlme) m <- lvm(Y ~ X1 + gender + group + Interaction) distribution(m, ~gender) <- binomial.lvm() distribution(m, ~group) <- binomial.lvm(size = 2) constrain(m, Interaction ~ gender + group) <- function(x){x[,1]*x[,2]} d <- sim(m, 1e2) d$gender <- factor(d$gender, labels = letters[1:2]) d$group <- factor(d$group) e.gls <- gls(Y ~ X1 + gender*group, data = d, weights = varIdent(form = ~1|group)) publish(e.gls) ## lme fm1 <- lme(distance ~ age*Sex, random = ~1|Subject, data = Orthodont) res <- publish(fm1) }
Pretty printing of test results.
## S3 method for class 'htest' publish(object, title, ...)
## S3 method for class 'htest' publish(object, title, ...)
object |
Result of |
title |
Decoration also used to name output |
... |
Used to transport arguments |
Thomas A. Gerds <[email protected]>
data(Diabetes) publish(t.test(bp.2s~gender,data=Diabetes)) publish(wilcox.test(bp.2s~gender,data=Diabetes)) publish(with(Diabetes,t.test(bp.2s,bp.1s,paired=TRUE))) publish(with(Diabetes,wilcox.test(bp.2s,bp.1s,paired=TRUE)))
data(Diabetes) publish(t.test(bp.2s~gender,data=Diabetes)) publish(wilcox.test(bp.2s~gender,data=Diabetes)) publish(with(Diabetes,t.test(bp.2s,bp.1s,paired=TRUE))) publish(with(Diabetes,wilcox.test(bp.2s,bp.1s,paired=TRUE)))
This is the heart of the Publish package
## S3 method for class 'matrix' publish( object, title, colnames = TRUE, rownames = TRUE, col1name = "", digits = 4, try.convert = TRUE, sep = " ", endhead, endrow, style, inter.lines, latex = FALSE, wiki = FALSE, org = FALSE, markdown = FALSE, tabular = TRUE, latex.table.format = NA, latex.hline = 1, latex.nodollar = FALSE, ... )
## S3 method for class 'matrix' publish( object, title, colnames = TRUE, rownames = TRUE, col1name = "", digits = 4, try.convert = TRUE, sep = " ", endhead, endrow, style, inter.lines, latex = FALSE, wiki = FALSE, org = FALSE, markdown = FALSE, tabular = TRUE, latex.table.format = NA, latex.hline = 1, latex.nodollar = FALSE, ... )
object |
Matrix to be published |
title |
Title for table, only in wiki and muse format |
colnames |
If |
rownames |
If |
col1name |
Name for first column |
digits |
Numbers are rounded according to digits |
try.convert |
Logical. If |
sep |
Field separator when style is |
endhead |
String to be pasted at the end of the first row (header) |
endrow |
String to be pasted at the end of each row |
style |
Table style for export to |
inter.lines |
A named list which contains strings to be
placed between the rows of the table. An element with name
|
latex |
If |
wiki |
If |
org |
If |
markdown |
If |
tabular |
For style |
latex.table.format |
For style |
latex.hline |
For style |
latex.nodollar |
For style |
... |
Used to transport arguments. Currently supports
|
x <- matrix(1:12,ncol=3) publish(x) # rounding the numeric part of data mixtures y <- cbind(matrix(letters[1:12],ncol=3),x,matrix(rnorm(12),ncol=3)) publish(y,digits=1) publish(x,latex=TRUE, inter.lines=list("1"="text between line 1 and line 2", "3"="text between line 3 and line 4"))
x <- matrix(1:12,ncol=3) publish(x) # rounding the numeric part of data mixtures y <- cbind(matrix(letters[1:12],ncol=3),x,matrix(rnorm(12),ncol=3)) publish(y,digits=1) publish(x,latex=TRUE, inter.lines=list("1"="text between line 1 and line 2", "3"="text between line 3 and line 4"))
Regression tables after multiple imputations
## S3 method for class 'MIresult' publish( object, confint.method, pvalue.method, digits = c(2, 4), print = TRUE, factor.reference = "extraline", intercept, units = NULL, fit, data, ... )
## S3 method for class 'MIresult' publish( object, confint.method, pvalue.method, digits = c(2, 4), print = TRUE, factor.reference = "extraline", intercept, units = NULL, fit, data, ... )
object |
Object obtained with mitools::MIcombine based on smcfcs::smcfcs multiple imputation analysis |
confint.method |
No options here. Only Wald type confidence intervals. |
pvalue.method |
No options here. Only Wald type tests. |
digits |
Rounding digits for all numbers but the p-values. |
print |
If |
factor.reference |
Style for showing results for
categorical. See |
intercept |
See |
units |
See |
fit |
One fitted model using the same formula as
|
data |
Original data set which includes the missing values |
... |
passed to summary.regressionTable, labelUnits and publish.default. |
Show results of smcfcs based multiple imputations of missing covariates in publishable format
Thomas A. Gerds <[email protected]>
## Not run: if (requireNamespace("riskRegression",quietly=TRUE) & requireNamespace("mitools",quietly=TRUE) & requireNamespace("smcfcs",quietly=TRUE)){ library(riskRegression) library(mitools) library(smcfcs) ## continuous outcome: linear regression # lava some data with missing values set.seed(7) d=sampleData(78) ## generate missing values d[X1==1,X6:=NA] d[X2==1,X3:=NA] d=d[,.(X8,X4,X3,X6,X7)] sapply(d,function(x)sum(is.na(x))) # multiple imputation (should set m to a large value) set.seed(17) f= smcfcs(d,smtype="lm", smformula=X8~X4+X3+X6+X7, method=c("","","logreg","norm",""),m=3) ccfit=lm(X8~X4+X3+X6+X7,data=d) mifit=MIcombine(with(imputationList(f$impDatasets), lm(X8~X4+X3+X6+X7))) publish(mifit,fit=ccfit,data=d) publish(ccfit) ## binary outcome # lava some data with missing values set.seed(7) db=sampleData(78,outcome="binary") ## generate missing values db[X1==1,X6:=NA] db[X2==1,X3:=NA] db=db[,.(Y,X4,X3,X6,X7)] sapply(db,function(x)sum(is.na(x))) # multiple imputation (should set m to a large value) set.seed(17) fb= smcfcs(db,smtype="logistic", smformula=Y~X4+X3+X6+X7, method=c("","","logreg","norm",""),m=2) ccfit=glm(Y~X4+X3+X6+X7,family="binomial",data=db) mifit=MIcombine(with(imputationList(fb$impDatasets), glm(Y~X4+X3+X6+X7,family="binomial"))) publish(mifit,fit=ccfit) publish(ccfit) ## survival: Cox regression library(survival) # lava some data with missing values set.seed(7) ds=sampleData(78,outcome="survival") ## generate missing values ds[X5==1,X6:=NA] ds[X2==1,X3:=NA] ds=ds[,.(time,event,X4,X3,X6,X7)] sapply(ds,function(x)sum(is.na(x))) set.seed(17) fs= smcfcs(ds,smtype="coxph", smformula="Surv(time,event)~X4+X3+X6+X7", method=c("","","","logreg","norm",""),m=2) ccfit=coxph(Surv(time,event)~X4+X3+X6+X7,data=ds) mifit=MIcombine(with(imputationList(fs$impDatasets), coxph(Surv(time,event)~X4+X3+X6+X7))) publish(mifit,fit=ccfit,data=ds) publish(ccfit) ## competing risks: Cause-specific Cox regression library(survival) # lava some data with missing values set.seed(7) dcr=sampleData(78,outcome="competing.risks") ## generate missing values dcr[X5==1,X6:=NA] dcr[X2==1,X3:=NA] dcr=dcr[,.(time,event,X4,X3,X6,X7)] sapply(dcr,function(x)sum(is.na(x))) set.seed(17) fcr= smcfcs(dcr,smtype="compet", smformula=c("Surv(time,event==1)~X4+X3+X6+X7", "Surv(time,event==2)~X4+X3+X6+X7"), method=c("","","","logreg","norm",""),m=2) ## cause 2 ccfit2=coxph(Surv(time,event==2)~X4+X3+X6+X7,data=dcr) mifit2=MIcombine(with(imputationList(fcr$impDatasets), coxph(Surv(time,event==2)~X4+X3+X6+X7))) publish(mifit2,fit=ccfit2,data=dcr) publish(ccfit2) } ## End(Not run)
## Not run: if (requireNamespace("riskRegression",quietly=TRUE) & requireNamespace("mitools",quietly=TRUE) & requireNamespace("smcfcs",quietly=TRUE)){ library(riskRegression) library(mitools) library(smcfcs) ## continuous outcome: linear regression # lava some data with missing values set.seed(7) d=sampleData(78) ## generate missing values d[X1==1,X6:=NA] d[X2==1,X3:=NA] d=d[,.(X8,X4,X3,X6,X7)] sapply(d,function(x)sum(is.na(x))) # multiple imputation (should set m to a large value) set.seed(17) f= smcfcs(d,smtype="lm", smformula=X8~X4+X3+X6+X7, method=c("","","logreg","norm",""),m=3) ccfit=lm(X8~X4+X3+X6+X7,data=d) mifit=MIcombine(with(imputationList(f$impDatasets), lm(X8~X4+X3+X6+X7))) publish(mifit,fit=ccfit,data=d) publish(ccfit) ## binary outcome # lava some data with missing values set.seed(7) db=sampleData(78,outcome="binary") ## generate missing values db[X1==1,X6:=NA] db[X2==1,X3:=NA] db=db[,.(Y,X4,X3,X6,X7)] sapply(db,function(x)sum(is.na(x))) # multiple imputation (should set m to a large value) set.seed(17) fb= smcfcs(db,smtype="logistic", smformula=Y~X4+X3+X6+X7, method=c("","","logreg","norm",""),m=2) ccfit=glm(Y~X4+X3+X6+X7,family="binomial",data=db) mifit=MIcombine(with(imputationList(fb$impDatasets), glm(Y~X4+X3+X6+X7,family="binomial"))) publish(mifit,fit=ccfit) publish(ccfit) ## survival: Cox regression library(survival) # lava some data with missing values set.seed(7) ds=sampleData(78,outcome="survival") ## generate missing values ds[X5==1,X6:=NA] ds[X2==1,X3:=NA] ds=ds[,.(time,event,X4,X3,X6,X7)] sapply(ds,function(x)sum(is.na(x))) set.seed(17) fs= smcfcs(ds,smtype="coxph", smformula="Surv(time,event)~X4+X3+X6+X7", method=c("","","","logreg","norm",""),m=2) ccfit=coxph(Surv(time,event)~X4+X3+X6+X7,data=ds) mifit=MIcombine(with(imputationList(fs$impDatasets), coxph(Surv(time,event)~X4+X3+X6+X7))) publish(mifit,fit=ccfit,data=ds) publish(ccfit) ## competing risks: Cause-specific Cox regression library(survival) # lava some data with missing values set.seed(7) dcr=sampleData(78,outcome="competing.risks") ## generate missing values dcr[X5==1,X6:=NA] dcr[X2==1,X3:=NA] dcr=dcr[,.(time,event,X4,X3,X6,X7)] sapply(dcr,function(x)sum(is.na(x))) set.seed(17) fcr= smcfcs(dcr,smtype="compet", smformula=c("Surv(time,event==1)~X4+X3+X6+X7", "Surv(time,event==2)~X4+X3+X6+X7"), method=c("","","","logreg","norm",""),m=2) ## cause 2 ccfit2=coxph(Surv(time,event==2)~X4+X3+X6+X7,data=dcr) mifit2=MIcombine(with(imputationList(fcr$impDatasets), coxph(Surv(time,event==2)~X4+X3+X6+X7))) publish(mifit2,fit=ccfit2,data=dcr) publish(ccfit2) } ## End(Not run)
Preparing a publishable table from riskRegression results
## S3 method for class 'riskRegression' publish(object, digits = c(2, 4), print = TRUE, ...)
## S3 method for class 'riskRegression' publish(object, digits = c(2, 4), print = TRUE, ...)
object |
object of class riskRegression as obtained with functions ARR and LRR. |
digits |
Number of digits for regression coefficients |
print |
If |
... |
passed to |
Table with regression coefficients, confidence intervals and p-values
Thomas A. Gerds <[email protected]>
ARR LRR
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(prodlim) library(lava) library(survival) set.seed(20) d <- SimCompRisk(20) f <- ARR(Hist(time,event)~X1+X2,data=d,cause=1) publish(f) publish(f,digits=c(1,3)) }
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(prodlim) library(lava) library(survival) set.seed(20) d <- SimCompRisk(20) f <- ARR(Hist(time,event)~X1+X2,data=d,cause=1) publish(f) publish(f,digits=c(1,3)) }
Write output of riskRegression::Score
in tables
## S3 method for class 'Score' publish(object, metrics, score = TRUE, contrasts = TRUE, level = 3, ...)
## S3 method for class 'Score' publish(object, metrics, score = TRUE, contrasts = TRUE, level = 3, ...)
object |
Object obtained with |
metrics |
Which metrics to put into tables. Defaults to
|
score |
Logical. If |
contrasts |
Logical. If |
level |
Level of subsection headers, i.e., ** for level 2 and *** for level 3 (useful for emacs org-users). Default is plain subsection headers no stars. A negative value will suppress subjection headers. |
... |
Passed to publish |
Collect prediction accuracy results in tables
Results of Score in tabular form
Thomas A. Gerds <[email protected]>
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(survival) learn = sampleData(100) val= sampleData(100) f1=CSC(Hist(time,event)~X1+X8,data=learn) f2=CSC(Hist(time,event)~X1+X5+X6+X8,learn) xs=Score(list(f1,f2),data=val,formula=Hist(time,event)~1) publish(xs) }
if (requireNamespace("riskRegression",quietly=TRUE)){ library(riskRegression) library(survival) learn = sampleData(100) val= sampleData(100) f1=CSC(Hist(time,event)~X1+X8,data=learn) f2=CSC(Hist(time,event)~X1+X5+X6+X8,learn) xs=Score(list(f1,f2),data=val,formula=Hist(time,event)~1) publish(xs) }
Format summary table of aov results
## S3 method for class 'summary.aov' publish( object, print = TRUE, handler = "sprintf", digits = c(2, 4), nsmall = digits, ... )
## S3 method for class 'summary.aov' publish( object, print = TRUE, handler = "sprintf", digits = c(2, 4), nsmall = digits, ... )
object |
glm object |
print |
Logical. Decide about whether or not to print the results. |
handler |
see |
digits |
see |
nsmall |
see |
... |
used to transport further arguments |
data(Diabetes) f <- glm(bp.1s~age+chol+gender+location,data=Diabetes) publish(summary(aov(f)),digits=c(1,2))
data(Diabetes) f <- glm(bp.1s~age+chol+gender+location,data=Diabetes) publish(summary(aov(f)),digits=c(1,2))
Alternative summary of survdiff results
## S3 method for class 'survdiff' publish(object, digits = c(2, 4), print = TRUE, ...)
## S3 method for class 'survdiff' publish(object, digits = c(2, 4), print = TRUE, ...)
object |
Object obtained with |
digits |
Vector with digits for rounding numbers: the second for pvalues, the first for all other numbers. |
print |
If |
... |
Not (yet) used. |
Thomas A. Gerds <[email protected]>
library(survival) data(pbc) sd <- survdiff(Surv(time,status!=0)~sex,data=pbc) publish(sd) publish(sd,digits=c(3,2))
library(survival) data(pbc) sd <- survdiff(Surv(time,status!=0)~sex,data=pbc) publish(sd) publish(sd,digits=c(3,2))
Tabulate the results of a regression analysis.
regressionTable( object, param.method = "coef", confint.method = c("default", "profile", "robust", "simultaneous"), pvalue.method = c("default", "robust", "simultaneous"), factor.reference = "extraline", intercept = 0L, units = NULL, noterms = NULL, probindex = 0L, ... )
regressionTable( object, param.method = "coef", confint.method = c("default", "profile", "robust", "simultaneous"), pvalue.method = c("default", "robust", "simultaneous"), factor.reference = "extraline", intercept = 0L, units = NULL, noterms = NULL, probindex = 0L, ... )
object |
Fitted regression model obtained with |
param.method |
Method to obtain model coefficients. |
confint.method |
Method to obtain confidence
intervals. Default is 'default' which leads to Wald
type intervals using the model based estimate of standard
error. 'profile' yields profile likelihood confidence
intervals, available from library MASS for |
pvalue.method |
Method to obtain p-values. If
|
factor.reference |
Style for showing results for categorical
variables. If |
intercept |
Logical. If |
units |
List of units for continuous variables. See examples. |
noterms |
Position of terms that should be ignored. E.g., for a Cox model with a cluster(id) term, there will be no hazard ratio for variable id. |
probindex |
Logical. If |
... |
Not yet used |
The basic use of this function is to generate a near publication worthy table from a regression object. As with summary(object) reference levels of factor variables are not included. Expansion of the table with such values can be performed using the "fixRegressionTable" function. Forest plot can be added to the output with "plotRegressionTable".
regressionTable produces an object (list) with the parameters deriveds. The summary function creates a data frame which can be used as a (near) publication ready table.
The table shows changes in mean for linear regression, odds ratios for logistic regression (family = binomial) and hazard ratios for Cox regression.
List of regression blocks
Thomas A. Gerds <[email protected]>
# linear regression data(Diabetes) f1 <- glm(bp.1s~age+gender+frame+chol,data=Diabetes) summary(regressionTable(f1)) summary(regressionTable(f1,units=list("chol"="mmol/L","age"="years"))) ## with interaction f2 <- glm(bp.1s~age*gender+frame+chol,data=Diabetes) summary(regressionTable(f2)) #Add reference values summary(regressionTable(f2)) f3 <- glm(bp.1s~age+gender*frame+chol,data=Diabetes) publish(f3) regressionTable(f3) # logistic regression Diabetes$hyp1 <- factor(1*(Diabetes$bp.1s>140)) l1 <- glm(hyp1~age+gender+frame+chol,data=Diabetes,family="binomial") regressionTable(l1) publish(l1) plot(regressionTable(l1)) ## with interaction l2 <- glm(hyp1~age+gender+frame*chol,data=Diabetes,family="binomial") regressionTable(l2) l3 <- glm(hyp1~age*gender+frame*chol,data=Diabetes,family="binomial") regressionTable(l3) # Cox regression library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) c1 <- coxph(Surv(time,status!=0)~log(bili)+age+protime+sex+edema,data=pbc) regressionTable(c1) # with interaction c2 <- coxph(Surv(time,status!=0)~log(bili)+age+protime*sex+edema,data=pbc) regressionTable(c2) c3 <- coxph(Surv(time,status!=0)~edema*log(bili)+age+protime+sex+edema+edema:sex,data=pbc) regressionTable(c3) if (requireNamespace("nlme",quietly=TRUE)){ ## gls regression library(lava) library(nlme) m <- lvm(Y ~ X1 + gender + group + Interaction) distribution(m, ~gender) <- binomial.lvm() distribution(m, ~group) <- binomial.lvm(size = 2) constrain(m, Interaction ~ gender + group) <- function(x){x[,1]*x[,2]} d <- sim(m, 1e2) d$gender <- factor(d$gender, labels = letters[1:2]) d$group <- factor(d$group) e.gls <- gls(Y ~ X1 + gender*group, data = d, weights = varIdent(form = ~1|group)) regressionTable(e.gls) summary(regressionTable(e.gls)) }
# linear regression data(Diabetes) f1 <- glm(bp.1s~age+gender+frame+chol,data=Diabetes) summary(regressionTable(f1)) summary(regressionTable(f1,units=list("chol"="mmol/L","age"="years"))) ## with interaction f2 <- glm(bp.1s~age*gender+frame+chol,data=Diabetes) summary(regressionTable(f2)) #Add reference values summary(regressionTable(f2)) f3 <- glm(bp.1s~age+gender*frame+chol,data=Diabetes) publish(f3) regressionTable(f3) # logistic regression Diabetes$hyp1 <- factor(1*(Diabetes$bp.1s>140)) l1 <- glm(hyp1~age+gender+frame+chol,data=Diabetes,family="binomial") regressionTable(l1) publish(l1) plot(regressionTable(l1)) ## with interaction l2 <- glm(hyp1~age+gender+frame*chol,data=Diabetes,family="binomial") regressionTable(l2) l3 <- glm(hyp1~age*gender+frame*chol,data=Diabetes,family="binomial") regressionTable(l3) # Cox regression library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) c1 <- coxph(Surv(time,status!=0)~log(bili)+age+protime+sex+edema,data=pbc) regressionTable(c1) # with interaction c2 <- coxph(Surv(time,status!=0)~log(bili)+age+protime*sex+edema,data=pbc) regressionTable(c2) c3 <- coxph(Surv(time,status!=0)~edema*log(bili)+age+protime+sex+edema+edema:sex,data=pbc) regressionTable(c3) if (requireNamespace("nlme",quietly=TRUE)){ ## gls regression library(lava) library(nlme) m <- lvm(Y ~ X1 + gender + group + Interaction) distribution(m, ~gender) <- binomial.lvm() distribution(m, ~group) <- binomial.lvm(size = 2) constrain(m, Interaction ~ gender + group) <- function(x){x[,1]*x[,2]} d <- sim(m, 1e2) d$gender <- factor(d$gender, labels = letters[1:2]) d$group <- factor(d$group) e.gls <- gls(Y ~ X1 + gender*group, data = d, weights = varIdent(form = ~1|group)) regressionTable(e.gls) summary(regressionTable(e.gls)) }
A study was made of all 26 astronauts on the first eight space shuttle flights (Bungo et.al., 1985). On a voluntary basis 17 astronauts consumed large quantities of salt and fluid prior to landing as a countermeasure to space deconditioning, while nine did not.
A data frame with 52 observations on the following 4 variables:
Factor with levels Post (after flight) and Pre (before flight)
Supine heart rate(beats per minute)
Countermeasure salt/fluid (1= yes, 0=no)
Person id
Altman, Practical statistics for medical research, Page 223, Ex. 9.1. Bungo et.al., 1985
data(SpaceT)
data(SpaceT)
A spaghettiogram is showing repeated measures (longitudinal data)
spaghettiogram( formula, data, xlim, ylim, xlab = "", ylab = "", axes = TRUE, col, lwd, lty, pch, legend = FALSE, add = FALSE, background = TRUE, ... )
spaghettiogram( formula, data, xlim, ylim, xlab = "", ylab = "", axes = TRUE, col, lwd, lty, pch, legend = FALSE, add = FALSE, background = TRUE, ... )
formula |
A formula which specifies the variables for the spaghettiograms. If Y ~ X + id(Z) then for each value of Z the spaghettiogram is the graph (X,Y) in the subset defined by the value of Z. Data are expected to be in the "long" format. Y is a numeric vector and X is a factor whose levels define the X-axis. Each level of the id-vector corresponds to one line (spaghetti) in the plot. |
data |
data set in which variables X, Y and Z are defined. |
xlim |
Limits for x-axis |
ylim |
Limits for y-axis |
xlab |
Label for x-axis |
ylab |
Label for x-axis |
axes |
Logical indicating if axes should be drawn. |
col |
Colors for the spaghettiograms |
lwd |
Widths for the spaghettiograms |
lty |
Type for the spaghettiograms |
pch |
Point-type for the spaghettiograms |
legend |
If |
add |
If |
background |
Control the background color of the graph. |
... |
used to transport arguments which are passed to the
following subroutines: |
List with data of each subject
data(SpaceT) Spaghettiogram(HR~Status+id(ID), data=SpaceT)
data(SpaceT) Spaghettiogram(HR~Status+id(ID), data=SpaceT)
Extract data and design matrix including specials from call
specialFrame( formula, data, unspecials.design = TRUE, specials, specials.factor = TRUE, specials.design = FALSE, strip.specials = TRUE, strip.arguments = NULL, strip.alias = NULL, strip.unspecials = NULL, drop.intercept = TRUE, response = TRUE, na.action = options()$na.action )
specialFrame( formula, data, unspecials.design = TRUE, specials, specials.factor = TRUE, specials.design = FALSE, strip.specials = TRUE, strip.arguments = NULL, strip.alias = NULL, strip.unspecials = NULL, drop.intercept = TRUE, response = TRUE, na.action = options()$na.action )
formula |
Formula whose left hand side specifies the event history, i.e., either via Surv() or Hist(). |
data |
Data frame in which the formula is interpreted |
unspecials.design |
Passed as is to
|
specials |
Character vector of special function names.
Usually the body of the special functions is function(x)x but
e.g., |
specials.factor |
Passed as is to |
specials.design |
Passed as is to |
strip.specials |
Passed as |
strip.arguments |
Passed as |
strip.alias |
Passed as |
strip.unspecials |
Passed as |
drop.intercept |
Passed as is to |
response |
If FALSE do not get response data. |
na.action |
Decide what to do with missing values. |
Obtain a list with the data used for event history regression analysis. This function cannot be used directly on the user level but inside a function to prepare data for survival analysis.
A list which contains
- the response
- the design matrix (see model.design
)
- one entry for each special (see model.design
)
Thomas A. Gerds <[email protected]>
model.frame model.design Hist
## Here are some data with an event time and no competing risks ## and two covariates X1 and X2. ## Suppose we want to declare that variable X1 is treated differently ## than variable X2. For example, X1 could be a cluster variable, or ## X1 should have a proportional effect on the outcome. d <- data.frame(y=1:7, X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05), X3=c(1,1,1,1,0,0,1), X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84), X1=factor(c("a","b","a","c","c","a","b"), levels=c("c","a","b"))) ## define special functions prop and cluster prop <- function(x)x cluster <- function(x)x ## We pass a formula and the data e <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster")) ## The first element is the response e$response ## The other elements are the design, i.e., model.matrix for the non-special covariates e$design ## and a data.frame for the special covariates e$prop ## The special covariates can be returned as a model.matrix e2 <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster"), specials.design=TRUE) e2$prop ## and the non-special covariates can be returned as a data.frame e3 <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster"), specials.design=TRUE, unspecials.design=FALSE) e3$design
## Here are some data with an event time and no competing risks ## and two covariates X1 and X2. ## Suppose we want to declare that variable X1 is treated differently ## than variable X2. For example, X1 could be a cluster variable, or ## X1 should have a proportional effect on the outcome. d <- data.frame(y=1:7, X2=c(2.24,3.22,9.59,4.4,3.54,6.81,5.05), X3=c(1,1,1,1,0,0,1), X4=c(44.69,37.41,68.54,38.85,35.9,27.02,41.84), X1=factor(c("a","b","a","c","c","a","b"), levels=c("c","a","b"))) ## define special functions prop and cluster prop <- function(x)x cluster <- function(x)x ## We pass a formula and the data e <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster")) ## The first element is the response e$response ## The other elements are the design, i.e., model.matrix for the non-special covariates e$design ## and a data.frame for the special covariates e$prop ## The special covariates can be returned as a model.matrix e2 <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster"), specials.design=TRUE) e2$prop ## and the non-special covariates can be returned as a data.frame e3 <- specialFrame(y~prop(X1)+X2+cluster(X3)+X4, data=d, specials=c("prop","cluster"), specials.design=TRUE, unspecials.design=FALSE) e3$design
Plotting the prediction of a logistic regression model with confidence bands against one continuous variable.
splinePlot.lrm( object, xvar, xvalues, xlim = range(xvalues), ylim, xlab = xvar, ylab = scale[[1]], col = 1, lty = 1, lwd = 3, confint = TRUE, newdata = NULL, scale = c("risk", "odds"), add = FALSE, ... )
splinePlot.lrm( object, xvar, xvalues, xlim = range(xvalues), ylim, xlab = xvar, ylab = scale[[1]], col = 1, lty = 1, lwd = 3, confint = TRUE, newdata = NULL, scale = c("risk", "odds"), add = FALSE, ... )
object |
Logistic regression model fitted with |
xvar |
Name of the variable to show on x-axis |
xvalues |
Sequence of |
xlim |
x-axis limits |
ylim |
y-axis limits |
xlab |
x-axis labels |
ylab |
y-axis labels |
col |
color of the line |
lty |
line style |
lwd |
line width |
confint |
Logical. If |
newdata |
How to adjust |
scale |
Character string that determines the outcome scale (y-axis). Choose between |
add |
Logical. If |
... |
Further arguments passed to |
Function which extracts from a logistic regression model
fitted with rms::lrm
the predicted risks or odds.
Thomas A. Gerds <[email protected]>
data(Diabetes) Diabetes$hypertension= 1*(Diabetes$bp.1s>140) library(rms) uu <- datadist(Diabetes) options(datadist="uu") fit=lrm(hypertension~rcs(age)+gender+hdl,data=Diabetes) splinePlot.lrm(fit,xvar="age",xvalues=seq(30,50,1))
data(Diabetes) Diabetes$hypertension= 1*(Diabetes$bp.1s>140) library(rms) uu <- datadist(Diabetes) options(datadist="uu") fit=lrm(hypertension~rcs(age)+gender+hdl,data=Diabetes) splinePlot.lrm(fit,xvar="age",xvalues=seq(30,50,1))
Some users like background colors, and it may be helpful to have grid lines
to read off e.g. probabilities from a Kaplan-Meier graph. Both things can be
controlled with this function. However, it mainly serves
plot.prodlim
.
stripes( xlim, ylim, col = "white", lwd = 1, gridcol = "gray77", fill = "white", horizontal = NULL, vertical = NULL, border = "black", xpd = FALSE )
stripes( xlim, ylim, col = "white", lwd = 1, gridcol = "gray77", fill = "white", horizontal = NULL, vertical = NULL, border = "black", xpd = FALSE )
xlim |
Limits for the horizontal x-dimension. Defaults to par("usr")[1:2]. |
ylim |
Limits for the vertical y-dimension. |
col |
Colors use for the stripes. Can be a vector of colors which are then repeated appropriately. |
lwd |
Line width |
gridcol |
Color of grid lines |
fill |
Color to fill the background rectangle given by par("usr"). |
horizontal |
Numerical values at which to show horizontal grid lines, and at which to change the color of the stripes. |
vertical |
Numerical values at which to show vertical grid lines. |
border |
If a fill color is provided, the color of the border around the background. |
xpd |
From |
Thomas Alexander Gerds <[email protected]>
plot(0,0) backGround(bg="beige",fg="red",vertical=0,horizontal=0) plot(0,0) stripes(col=c("yellow","green"),gridcol="red",xlim=c(-1,1),horizontal=seq(0,1,.1)) stripes(col=c("yellow","green"),gridcol="red",horizontal=seq(0,1,.1))
plot(0,0) backGround(bg="beige",fg="red",vertical=0,horizontal=0) plot(0,0) stripes(col=c("yellow","green"),gridcol="red",xlim=c(-1,1),horizontal=seq(0,1,.1)) stripes(col=c("yellow","green"),gridcol="red",horizontal=seq(0,1,.1))
The function can examine Cox regression, logistic regression and Poisson regression (Poisson regression for survival analysis) where the effect of one variable is of particular interest. This function systematically checks for effect modification with a list of other variables.
In randomised studies the main regression analysis is often univariate and includes only the exposure of interest. In observational studies the main regression analysis can readily be adjusted for other variables including those which may modify the effect of the variable of interest.
subgroupAnalysis( object, data, treatment, subgroups, confint.method = "default", factor.reference = "extraline", ... )
subgroupAnalysis( object, data, treatment, subgroups, confint.method = "default", factor.reference = "extraline", ... )
object |
- glm, coxph or cph object for which subgroups should be analyzed. |
data |
- Dataset including all relevant variables |
treatment |
- Must be numeric - 0/1 |
subgroups |
- A vector of variable names presenting the factor variables where subgroups should be formed. These variables should all be "factors" |
confint.method |
"default" creates Wald type confidence interval, "robust", creates creates robust standard errors - see regressionTable function. |
factor.reference |
"extraline" creates an extraline for the reference, "inline" avoids this line. |
... |
additional arguments such as case weights, which are passed on to |
The function can only handle a bivariate treatment, which MUST coded as zero or one. The p-value for interaction is obtained with a likelihood ratio test comparing the main regression analysis with the interaction model.
There are plot and print functions available for the function see helppages for plot.subgroupAnalysis and print.subgroupAnalysis
A data.frame with subsgroup specifications, number in each subgroup, parameter estimates and p-value for interaction. A forest plot can be obtained with "plotConfidence".
Christian Torp-Pedersen
coxph, glm, plotConfidence
#load libraries library(data.table) library(Publish) library(survival) data(traceR) #get dataframe traceR data.table::setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")))] traceR[,sex:=as.factor(sex)] # all subgroup variables needs to be factor traceR[observationTime==0,observationTime:=1] # remove missing covariate values traceR=na.omit(traceR) # univariate analysis of smoking in subgroups of age and sex # Main regression analysis is a simple/univariate Cox regression fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) sub_cox # to see how the results are obtained consider the variable: smoking fit_cox_smoke <- coxph(Surv(observationTime,dead)~treatment*smoking,data=traceR) # the last three rows of the following output: publish(fit_cox_smoke) # are included in the first 3 rows of the result of the sub group analysis: sub_cox[1:3,] # the p-value is obtained as: fit_cox_smoke_add <- coxph(Surv(observationTime,dead)~treatment+smoking,data=traceR) anova(fit_cox_smoke_add,fit_cox_smoke,test="Chisq") # Note that a real subgroup analysis would be to subset the data fit_cox1a <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="never"]) fit_cox1b <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="current"]) fit_cox1c <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="prior"]) ## when the main analysis is already adjusted fit_cox_adj <- coxph(Surv(observationTime,dead)~treatment+smoking+sex+wmi2+abd2, data=traceR) sub_cox_adj <- subgroupAnalysis(fit_cox_adj,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) # subgroups as character string sub_cox_adj # When both start and end are in the Surv statement: traceR[,null:=0] fit_cox2 <- coxph(Surv(null,observationTime,dead)~treatment+smoking+sex+wmi2+abd2,data=traceR) summary(regressionTable(fit_cox)) sub_cox2 <- subgroupAnalysis(fit_cox2,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) # Analysis with Poisson - and the unrealistic assumption of constant hazard # and adjusted for age in all subgroups fit_p <- glm(dead~treatment+age+offset(log(observationTime)),family="poisson", data=traceR) sub_pois <- subgroupAnalysis(fit_p,traceR,treatment="treatment", subgroups=~smoking+sex+wmi2+abd2) # Analysis with logistic regression - and very wrongly ignoring censoring fit_log <- glm(dead~treatment+age,family="binomial",data=traceR) sub_log <- subgroupAnalysis(fit_log,traceR,treatment="treatment", subgroups=~smoking+sex+wmi2+abd2, factor.reference="inline")
#load libraries library(data.table) library(Publish) library(survival) data(traceR) #get dataframe traceR data.table::setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")))] traceR[,sex:=as.factor(sex)] # all subgroup variables needs to be factor traceR[observationTime==0,observationTime:=1] # remove missing covariate values traceR=na.omit(traceR) # univariate analysis of smoking in subgroups of age and sex # Main regression analysis is a simple/univariate Cox regression fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) sub_cox # to see how the results are obtained consider the variable: smoking fit_cox_smoke <- coxph(Surv(observationTime,dead)~treatment*smoking,data=traceR) # the last three rows of the following output: publish(fit_cox_smoke) # are included in the first 3 rows of the result of the sub group analysis: sub_cox[1:3,] # the p-value is obtained as: fit_cox_smoke_add <- coxph(Surv(observationTime,dead)~treatment+smoking,data=traceR) anova(fit_cox_smoke_add,fit_cox_smoke,test="Chisq") # Note that a real subgroup analysis would be to subset the data fit_cox1a <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="never"]) fit_cox1b <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="current"]) fit_cox1c <- coxph(Surv(observationTime,dead)~treatment,data=traceR[smoking=="prior"]) ## when the main analysis is already adjusted fit_cox_adj <- coxph(Surv(observationTime,dead)~treatment+smoking+sex+wmi2+abd2, data=traceR) sub_cox_adj <- subgroupAnalysis(fit_cox_adj,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) # subgroups as character string sub_cox_adj # When both start and end are in the Surv statement: traceR[,null:=0] fit_cox2 <- coxph(Surv(null,observationTime,dead)~treatment+smoking+sex+wmi2+abd2,data=traceR) summary(regressionTable(fit_cox)) sub_cox2 <- subgroupAnalysis(fit_cox2,traceR,treatment="treatment", subgroups=c("smoking","sex","wmi2","abd2")) # Analysis with Poisson - and the unrealistic assumption of constant hazard # and adjusted for age in all subgroups fit_p <- glm(dead~treatment+age+offset(log(observationTime)),family="poisson", data=traceR) sub_pois <- subgroupAnalysis(fit_p,traceR,treatment="treatment", subgroups=~smoking+sex+wmi2+abd2) # Analysis with logistic regression - and very wrongly ignoring censoring fit_log <- glm(dead~treatment+age,family="binomial",data=traceR) sub_log <- subgroupAnalysis(fit_log,traceR,treatment="treatment", subgroups=~smoking+sex+wmi2+abd2, factor.reference="inline")
Summarize confidence intervals
## S3 method for class 'ci' summary(object, format = "[u;l]", se = FALSE, print = TRUE, ...)
## S3 method for class 'ci' summary(object, format = "[u;l]", se = FALSE, print = TRUE, ...)
object |
Object of class ci containing point estimates and the corresponding confidence intervals |
format |
A string which indicates the format used for
confidence intervals. The string is passed to
|
se |
If |
print |
Logical: if |
... |
used to control formatting of numbers |
This format of the confidence intervals is user-manipulable.
Formatted confidence intervals
Thomas A. Gerds <[email protected]>
ci plot.ci format.ci
library(lava) m <- lvm(Y~X) m <- categorical(m,Y~X,K=4) set.seed(4) d <- sim(m,24) ci.mean(Y~X,data=d) x <- summary(ci.mean(Y~X,data=d),digits=2) x x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=2) x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,se=TRUE) x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,handler="format") x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,handler="prettyNum")
library(lava) m <- lvm(Y~X) m <- categorical(m,Y~X,K=4) set.seed(4) d <- sim(m,24) ci.mean(Y~X,data=d) x <- summary(ci.mean(Y~X,data=d),digits=2) x x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=2) x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,se=TRUE) x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,handler="format") x <- summary(ci.mean(Y~X,data=d),format="(u,l)",digits=1,handler="prettyNum")
Preparing regression results for publication
## S3 method for class 'regressionTable' summary(object, show.missing = "ifany", print = TRUE, ...)
## S3 method for class 'regressionTable' summary(object, show.missing = "ifany", print = TRUE, ...)
object |
object obtained with |
show.missing |
Decide if number of missing values are shown.
Either logical or character. If |
print |
If |
... |
Used to control formatting of parameter estimates, confidence intervals and p-values. See examples. |
List with two elements:
regressionTable: the formatted regression table (a data.frame)
rawTable: table with the unformatted values (a data.frame)
Thomas A. Gerds <[email protected]>
publish.glm publish.coxph
library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) fit = coxph(Surv(time,status!=0)~age+sex+edema+log(bili)+log(albumin)+log(protime), data=pbc) u=summary(regressionTable(fit)) u$regressionTable u$rawTable summary(regressionTable(fit),handler="prettyNum") summary(regressionTable(fit),handler="format") summary(regressionTable(fit),handler="sprintf",digits=c(2,2),pValue.stars=TRUE) summary(regressionTable(fit),handler="sprintf",digits=c(2,2),pValue.stars=TRUE,ci.format="(l,u)")
library(survival) data(pbc) pbc$edema <- factor(pbc$edema,levels=c("0","0.5","1"),labels=c("0","0.5","1")) fit = coxph(Surv(time,status!=0)~age+sex+edema+log(bili)+log(albumin)+log(protime), data=pbc) u=summary(regressionTable(fit)) u$regressionTable u$rawTable summary(regressionTable(fit),handler="prettyNum") summary(regressionTable(fit),handler="format") summary(regressionTable(fit),handler="sprintf",digits=c(2,2),pValue.stars=TRUE) summary(regressionTable(fit),handler="sprintf",digits=c(2,2),pValue.stars=TRUE,ci.format="(l,u)")
This function operates on a "subgroupAnalysis" object to produce a formatted table.
## S3 method for class 'subgroupAnalysis' summary( object, digits = 3, eps = 0.001, subgroup.p = FALSE, keep.digital = FALSE, ... )
## S3 method for class 'subgroupAnalysis' summary( object, digits = 3, eps = 0.001, subgroup.p = FALSE, keep.digital = FALSE, ... )
object |
- a subgroupAnalysis object |
digits |
- number of digits for risk ratios |
eps |
- lowest value of p to be shown exactly, others will be "<eps" |
subgroup.p |
- present p-values for analyses in subgroups |
keep.digital |
- prevents formatting risk ratio and confidence limits. Useful for cases when further manipulations of rows and columns prior to adding a forest plot is relevant. |
... |
- not currently used |
This function produces a formatted or unformatted table of a subgroupAnalysis object. A forest plot can be added with the plot function.
A data.frame with formatted values for subgroups
Christian Torp-Pedersen
subgroupAnalysis
#load libraries library(Publish) library(survival) library(data.table) data(traceR) #get dataframe traceR setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")))] traceR[,sex:=as.factor(sex)] # all subgroup variables needs to be factor traceR[observationTime==0,observationTime:=1] # univariate analysis of smoking in subgroups of age and sex # Basic model from randomised study - but observed for 12 years fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroup=c("smoking","sex","wmi2","abd2")) # subgroups as character string summary(sub_cox)
#load libraries library(Publish) library(survival) library(data.table) data(traceR) #get dataframe traceR setDT(traceR) traceR[,':='(wmi2=factor(wallMotionIndex<0.9,levels=c(TRUE,FALSE), labels=c("bad","good")), abd2=factor(abdominalCircumference<95, levels=c(TRUE,FALSE), labels=c("slim","fat")))] traceR[,sex:=as.factor(sex)] # all subgroup variables needs to be factor traceR[observationTime==0,observationTime:=1] # univariate analysis of smoking in subgroups of age and sex # Basic model from randomised study - but observed for 12 years fit_cox <- coxph(Surv(observationTime,dead)~treatment,data=traceR) sub_cox <- subgroupAnalysis(fit_cox,traceR,treatment="treatment", subgroup=c("smoking","sex","wmi2","abd2")) # subgroups as character string summary(sub_cox)
Summary function for univariate table
## S3 method for class 'univariateTable' summary( object, n = "inNames", drop.reference = FALSE, pvalue.stars = FALSE, pvalue.digits = 4, show.missing = c("ifany", "always", "never"), show.pvalues, show.totals, ... )
## S3 method for class 'univariateTable' summary( object, n = "inNames", drop.reference = FALSE, pvalue.stars = FALSE, pvalue.digits = 4, show.missing = c("ifany", "always", "never"), show.pvalues, show.totals, ... )
object |
|
n |
If not missing, show the number of subjects in each
column. If equal to |
drop.reference |
Logical or character (vector). Decide if line with reference
level should be suppressed for factors. If |
pvalue.stars |
If TRUE use |
pvalue.digits |
Passed to |
show.missing |
Decides if number of missing values are shown in table.
Defaults to |
show.pvalues |
Logical. If set to |
show.totals |
Logical. If set to |
... |
passed on to |
Collects results of univariate table in a matrix.
Summary table
Thomas A. Gerds <[email protected]>
data(Diabetes) u <- univariateTable(gender~age+location+Q(BMI)+height+weight, data=Diabetes) summary(u) summary(u,n=NULL) summary(u,pvalue.digits=2,"age"="Age (years)","height"="Body height (cm)") u2 <- univariateTable(location~age+AgeGroups+gender+height+weight, data=Diabetes) summary(u2) summary(u2,drop.reference=TRUE) ## same but more flexible summary(u2,drop.reference=c("binary")) ## same but even more flexible summary(u2,drop.reference=c("gender"))
data(Diabetes) u <- univariateTable(gender~age+location+Q(BMI)+height+weight, data=Diabetes) summary(u) summary(u,n=NULL) summary(u,pvalue.digits=2,"age"="Age (years)","height"="Body height (cm)") u2 <- univariateTable(location~age+AgeGroups+gender+height+weight, data=Diabetes) summary(u2) summary(u2,drop.reference=TRUE) ## same but more flexible summary(u2,drop.reference=c("binary")) ## same but even more flexible summary(u2,drop.reference=c("gender"))
First apply univariateTable then call summary.
sutable(...)
sutable(...)
... |
Unnamed arguments and are passed to |
Summary table
Thomas A. Gerds <[email protected]>
summary.univariateTable univariateTable
data(Diabetes) sutable(gender~age+location+Q(BMI)+height+weight,data=Diabetes,BMI="Body mass index (kg/m^2)")
data(Diabetes) sutable(gender~age+location+Q(BMI)+height+weight,data=Diabetes,BMI="Body mass index (kg/m^2)")
2x2 table calculus for teaching
table2x2( x, digits = 1, conf.level = 0.95, stats = c("table", "rd", "rr", "or", "chisq", "fisher") )
table2x2( x, digits = 1, conf.level = 0.95, stats = c("table", "rd", "rr", "or", "chisq", "fisher") )
x |
2x2 table |
digits |
rounding digits |
conf.level |
Confidence level used for constructing confidence intervals. Default is 0.95. |
stats |
subset or all of |
2x2 table calculus for teaching
see example
Thomas A. Gerds <[email protected]>
table2x2(table("marker"=rbinom(100,1,0.4),"response"=rbinom(100,1,0.1))) table2x2(matrix(c(71,18,38,8),ncol=2),stats="table") table2x2(matrix(c(71,18,38,8),ncol=2),stats=c("rr","fisher"))
table2x2(table("marker"=rbinom(100,1,0.4),"response"=rbinom(100,1,0.1))) table2x2(matrix(c(71,18,38,8),ncol=2),stats="table") table2x2(matrix(c(71,18,38,8),ncol=2),stats=c("rr","fisher"))
These data are from screening to the TRACE study, a comparison between the angiotensin converting enzyme inhibitor trandolapril and placebo ford large myocardial infarctions. A total of 6676 patients were screened for the study. Survival has been followed for the screened population for 16 years. The current data has been prepared for a poisson regression to examine survival. The data has been "split" in 0.5 year intervals (plitLexis function from Epi package) and then collapsed on all variables (aggregate function).
A data frame with 1832 observations on the following 6 variables.
Time after myocardial infarction, in 6 months intervals
Smoking status. A factor with levels (Never, Current, Previous)
A factor with levels (Female, Male)
Age in years at the time of myocardial infarction
Cumulative risk time in each split
Count of deaths
Kober et al 1995 Am. J. Cardiol 76,1-5
data(trace) Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit)
data(trace) Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit)
These data are from the TRACE randomised trial, a comparison between the angiotensin converting enzyme inhibitor trandolapril and placebo ford large myocardial infarctions. In all, 1749 patients were randomised. The current data are from a 15 year follow-up.
A data frame with 1749 observations on the following variables.
Weight in kilo
Height in meters
in centimeters
in mmol per liter
left ventricular function 0-2, 0 worst, 2 normal
time to death or censor
age in years
0=female,1=male
0=never,1=prior,2=current
0=censor,1=dead
placebo or trandolapril
Kober et al 1995 NEJM 333,1670
data(trace) Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit)
data(trace) Units(trace,list("age"="years")) fit <- glm(dead ~ smoking+sex+age+Time+offset(log(ObsTime)), family="poisson",data=trace) rtf <- regressionTable(fit,factor.reference = "inline") summary(rtf) publish(fit)
Add variable units to data.frame (or data.table).
Units(object, units)
Units(object, units)
object |
A data.frame or data.table |
units |
Named list of units. Names are variable names. If omitted, show existing units. |
If the object has units existing units are replaced by given units.
The object augmented with attribute "units"
Thomas A. Gerds <[email protected]>
data(Diabetes) Diabetes <- Units(Diabetes,list(BMI="kg/m^2")) Units(Diabetes) Diabetes <- Units(Diabetes,list(bp.1s="mm Hg",bp.2s="mm Hg")) Units(Diabetes)
data(Diabetes) Diabetes <- Units(Diabetes,list(BMI="kg/m^2")) Units(Diabetes) Diabetes <- Units(Diabetes,list(bp.1s="mm Hg",bp.2s="mm Hg")) Units(Diabetes)
Categorical variables are summarized using counts and frequencies and compared .
univariateTable( formula, data = parent.frame(), summary.format = "mean(x) (sd(x))", Q.format = "median(x) [iqr(x)]", freq.format = "count(x) (percent(x))", column.percent = TRUE, digits = c(1, 1, 3), big.mark = ",", short.groupnames, compare.groups = TRUE, show.totals = TRUE, n = "inNames", outcome = NULL, ... )
univariateTable( formula, data = parent.frame(), summary.format = "mean(x) (sd(x))", Q.format = "median(x) [iqr(x)]", freq.format = "count(x) (percent(x))", column.percent = TRUE, digits = c(1, 1, 3), big.mark = ",", short.groupnames, compare.groups = TRUE, show.totals = TRUE, n = "inNames", outcome = NULL, ... )
formula |
Formula specifying the grouping variable (strata) on the left hand side (can be omitted) and on the right hand side the variables for which to obtain (descriptive) statistics. |
data |
Data set in which formula is evaluated |
summary.format |
Format for the numeric (non-factor) variables. Default is mean (SD). If different formats are desired, either special Q can be used or the function is called multiple times and the results are rbinded. See examples. |
Q.format |
Format for quantile summary of numerical variables: Default is median (inter quartile range). |
freq.format |
Format for categorical variables. Default is count (percentage). |
column.percent |
Logical, if |
digits |
Number of digits |
big.mark |
For formatting large numbers (i.e., greater than 1,000). |
short.groupnames |
If |
compare.groups |
Method used to compare groups. If
|
show.totals |
If |
n |
If |
outcome |
Outcome data used to calculate p-values when
compare groups method is |
... |
saved as part of the result to be passed on to
|
This function can generate the baseline demographic characteristics that forms table 1 in many publications. It is also useful for generating other tables of univariate statistics.
The result of the function is an object (list) which containe the various data
generated. In most applications the summary
function should be applied which generates
a data.frame with a (nearly) publication ready table. Standard manipulation can be
used to modify, add or remove columns/rows and for users not accustomed to R the table
generated can be exported to a text file which can be read by other software, e.g., via
write.csv(table,file="path/to/results/table.csv")
By default, continuous variables are summarized by means and standard deviations and compared with t-tests. When continuous variables are summarized by medians and interquartile ranges the Deviations from the above defaults are obtained when the arguments summary.format and freq.format are combined with suitable summary functions.
List with one summary table element for each variable on the right hand side of formula.
The summary tables can be combined with rbind
. The function summary.univariateTable
combines the tables, and shows p-values in custom format.
Thomas A. Gerds
summary.univariateTable, publish.univariateTable
data(Diabetes) library(data.table) univariateTable(~age,data=Diabetes) univariateTable(~gender,data=Diabetes) univariateTable(~age+gender+ height+weight,data=Diabetes) ## same thing but less typing utable(~age+gender+ height+weight,data=Diabetes) ## summary by location: univariateTable(location~Q(age)+gender+height+weight,data=Diabetes) ## continuous variables marked with Q() are (by default) summarized ## with median (IQR) and kruskal.test (with two groups equivalent to wilcox.test) ## variables not marked with Q() are (by default) summarized ## with mean (sd) and anova.glm(...,test="Chisq") ## the p-value of anova(glm()) with only two groups is similar ## but not exactly equal to that of a t.test ## categorical variables are (by default) summarized by count ## (percent) and chi-square tests (\code{chisq.test}). When \code{compare.groups ='logistic'} ## anova(glm(...,family=binomial,test="Chisq")) is used to calculate p-values. ## export result to csv table1 = summary(univariateTable(location~age+gender+height+weight,data=Diabetes), show.pvalues=FALSE) # write.csv(table1,file="~/table1.csv",rownames=FALSE) ## change labels and values utable(location~age+gender+height+weight,data=Diabetes, age="Age (years)",gender="Sex", gender.female="Female", gender.male="Male", height="Body height (inches)", weight="Body weight (pounds)") ## Use quantiles and rank tests for some variables and mean and standard deviation for others univariateTable(gender~Q(age)+location+Q(BMI)+height+weight, data=Diabetes) ## Factor with more than 2 levels Diabetes$AgeGroups <- cut(Diabetes$age, c(19,29,39,49,59,69,92), include.lowest=TRUE) univariateTable(location~AgeGroups+gender+height+weight, data=Diabetes) ## Row percent univariateTable(location~gender+age+AgeGroups, data=Diabetes, column.percent=FALSE) ## change of frequency format univariateTable(location~gender+age+AgeGroups, data=Diabetes, column.percent=FALSE, freq.format="percent(x) (n=count(x))") ## changing Labels u <- univariateTable(location~gender+AgeGroups+ height + weight, data=Diabetes, column.percent=TRUE, freq.format="count(x) (percent(x))") summary(u,"AgeGroups"="Age (years)","height"="Height (inches)") ## more than two groups Diabetes$frame=factor(Diabetes$frame,levels=c("small","medium","large")) univariateTable(frame~gender+BMI+age,data=Diabetes) Diabetes$sex=as.numeric(Diabetes$gender) univariateTable(frame~sex+gender+BMI+age, data=Diabetes,freq.format="count(x) (percent(x))") ## multiple summary formats ## suppose we want for some reason mean (range) for age ## and median (range) for BMI. ## method 1: univariateTable(frame~Q(age)+BMI, data=Diabetes, Q.format="mean(x) (range(x))", summary.format="median(x) (range(x))") ## method 2: u1 <- summary(univariateTable(frame~age, data=na.omit(Diabetes), summary.format="mean(x) (range(x))")) u2 <- summary(univariateTable(frame~BMI, data=na.omit(Diabetes), summary.format="median(x) (range(x))")) publish(rbind(u1,u2),digits=2) ## Large number format (big.mark) n=100000 dat=data.frame(id=1:n,z=rbinom(n,1,.3),x=factor(sample(1:8,size=n,replace=TRUE))) u3 <- summary(univariateTable(z~x, data=dat,big.mark=",")) u3
data(Diabetes) library(data.table) univariateTable(~age,data=Diabetes) univariateTable(~gender,data=Diabetes) univariateTable(~age+gender+ height+weight,data=Diabetes) ## same thing but less typing utable(~age+gender+ height+weight,data=Diabetes) ## summary by location: univariateTable(location~Q(age)+gender+height+weight,data=Diabetes) ## continuous variables marked with Q() are (by default) summarized ## with median (IQR) and kruskal.test (with two groups equivalent to wilcox.test) ## variables not marked with Q() are (by default) summarized ## with mean (sd) and anova.glm(...,test="Chisq") ## the p-value of anova(glm()) with only two groups is similar ## but not exactly equal to that of a t.test ## categorical variables are (by default) summarized by count ## (percent) and chi-square tests (\code{chisq.test}). When \code{compare.groups ='logistic'} ## anova(glm(...,family=binomial,test="Chisq")) is used to calculate p-values. ## export result to csv table1 = summary(univariateTable(location~age+gender+height+weight,data=Diabetes), show.pvalues=FALSE) # write.csv(table1,file="~/table1.csv",rownames=FALSE) ## change labels and values utable(location~age+gender+height+weight,data=Diabetes, age="Age (years)",gender="Sex", gender.female="Female", gender.male="Male", height="Body height (inches)", weight="Body weight (pounds)") ## Use quantiles and rank tests for some variables and mean and standard deviation for others univariateTable(gender~Q(age)+location+Q(BMI)+height+weight, data=Diabetes) ## Factor with more than 2 levels Diabetes$AgeGroups <- cut(Diabetes$age, c(19,29,39,49,59,69,92), include.lowest=TRUE) univariateTable(location~AgeGroups+gender+height+weight, data=Diabetes) ## Row percent univariateTable(location~gender+age+AgeGroups, data=Diabetes, column.percent=FALSE) ## change of frequency format univariateTable(location~gender+age+AgeGroups, data=Diabetes, column.percent=FALSE, freq.format="percent(x) (n=count(x))") ## changing Labels u <- univariateTable(location~gender+AgeGroups+ height + weight, data=Diabetes, column.percent=TRUE, freq.format="count(x) (percent(x))") summary(u,"AgeGroups"="Age (years)","height"="Height (inches)") ## more than two groups Diabetes$frame=factor(Diabetes$frame,levels=c("small","medium","large")) univariateTable(frame~gender+BMI+age,data=Diabetes) Diabetes$sex=as.numeric(Diabetes$gender) univariateTable(frame~sex+gender+BMI+age, data=Diabetes,freq.format="count(x) (percent(x))") ## multiple summary formats ## suppose we want for some reason mean (range) for age ## and median (range) for BMI. ## method 1: univariateTable(frame~Q(age)+BMI, data=Diabetes, Q.format="mean(x) (range(x))", summary.format="median(x) (range(x))") ## method 2: u1 <- summary(univariateTable(frame~age, data=na.omit(Diabetes), summary.format="mean(x) (range(x))")) u2 <- summary(univariateTable(frame~BMI, data=na.omit(Diabetes), summary.format="median(x) (range(x))")) publish(rbind(u1,u2),digits=2) ## Large number format (big.mark) n=100000 dat=data.frame(id=1:n,z=rbinom(n,1,.3),x=factor(sample(1:8,size=n,replace=TRUE))) u3 <- summary(univariateTable(z~x, data=dat,big.mark=",")) u3