---
title: "Approximating an arbitrary hazard function"
author: "Keaven Anderson"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Approximating an arbitrary hazard function}
  %\VignetteEngine{knitr::rmarkdown}
---

```{r, include=FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.height = 4,
  fig.width = 7.5,
  out.width = "100%"
)

run <- requireNamespace("dplyr", quietly = TRUE) &&
  requireNamespace("ggplot2", quietly = TRUE)
knitr::opts_chunk$set(eval = run)
```

```{r, message=FALSE, warning=FALSE}
library(simtrial)
library(ggplot2)
library(dplyr)
library(survival)
```

This vignette uses the bshazard package.
If it is not on CRAN, you can install it with

```{r, eval=FALSE}
remotes::install_github("cran/bshazard")
```

We simulate a log-logistic distribution as an example of how to simulate a trial with an arbitrary distribution.
We begin by showing hazard rates that can be used to approximate this distribution.

```{r}
set.seed(123)

dloglogis <- function(x, alpha = 1, beta = 4) {
  1 / (1 + (x / alpha)^beta)
}
times <- (1:150) / 50
xx <- data.frame(
  Times = times,
  Survival = dloglogis(times, alpha = .5, beta = 4)
) |>
  mutate(
    duration = Times - lag(Times, default = 0),
    H = -log(Survival),
    rate = (H - lag(H, default = 0)) / duration / 3
  ) |>
  select(duration, rate)
ggplot(
  data = xx |> mutate(Time = lag(cumsum(duration), default = 0)),
  aes(x = Time, y = rate)
) +
  geom_line()
```

We assume the time scale above is in years and that enrollment occurs over the first half year at an even rate of 500 per year.
We assume that observations are censored at an exponential rate of about 5% per year.

```{r}
tx <- "Log-logistic"
enroll_rate <- data.frame(duration = .5, rate = 500)
dropout_rate <- data.frame(
  treatment = tx,
  duration = 3,
  rate = .05,
  period = 1,
  stratum = "All"
)
block <- rep(tx, 2)
x <- sim_pw_surv(
  n = 250, # Sample size
  block = block,
  enroll_rate = enroll_rate,
  fail_rate = xx |> mutate(
    stratum = "All",
    treatment = tx,
    period = seq_len(n()),
    stratum = "All"
  ),
  dropout_rate = dropout_rate
)
```

We assume the entire study lasts 3 years

```{r}
y <- x |> cut_data_by_date(3)
head(y)
```

Now we estimate a Kaplan-Meier curve.

```{r, fig.height=4, fig.width=7.5}
fit <- survfit(Surv(tte, event) ~ 1, data = y)
plot(fit, mark = "|")
```

Finally, we plot the estimated hazard rate and its confidence interval as a function of time.
We overlay the actual rates in red.

```{r, echo=FALSE}
fit <- readRDS("fit-bshazard.rds")

plot.bshazard <- function(
    x, conf.int = TRUE, overall = TRUE, col = 1, lwd = 1, lty = 1,
    xlab = "Time", ylab = "Hazard rate", border = NA, col.fill = "lightgrey",
    ...) {
  plot(
    x$time, x$hazard,
    xlab = xlab, type = "l", ylab = ylab,
    lwd = lwd, lty = lty, col = col, ...
  )
  polygon(
    c(x$time, rev(x$time)), c(x$low, rev(x$up)),
    col = col.fill, border = border, ...
  )
  lines(
    x$time, x$hazard,
    xlab = xlab, type = "l",
    ylab = ylab, lwd = 2, lty = lty, col = col, ...
  )
}
```

```{r, eval=FALSE}
fit <- bshazard::bshazard(Surv(tte, event) ~ 1, data = y, nk = 120)
```

```{r}
plot(fit, conf.int = TRUE, xlab = "Time", xlim = c(0, 3), ylim = c(0, 2.5), lwd = 2)
lines(x = times, y = (xx |> mutate(Time = lag(cumsum(duration), default = 0)))$rate, col = 2)
```