Explore clusters

Author

Alexander Lawless

Published

17-03-2025

expand for setup code chunk
knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE, fig.width =12, fig.height = 9)

library(tidyverse)
library(patchwork)
library(DT)
library(sf)

# Set SU theme ----
SU_colours <- c(
  `orange`                     = grDevices::rgb(248, 191, 7, maxColorValue = 255), # "#f9bf07",
  `charcoal`                   = grDevices::rgb(44, 40, 37, maxColorValue = 255), # "#2c2825",
  `slate`                      = grDevices::rgb(104, 111, 115, maxColorValue = 255), # "#686f73",
  `blue`                       = grDevices::rgb(88, 29, 193, maxColorValue = 255), # "#5881c1",
  `red`                        = grDevices::rgb(236, 101, 85, maxColorValue = 255), # "#ec6555",
  # additional accent colours from word doc template
  `yellow`                     = grDevices::rgb(252, 229, 155, maxColorValue = 255),
  `grey`                       = grDevices::rgb(163, 168, 172, maxColorValue = 255),
  `white`                      = grDevices::rgb(255, 255, 255, maxColorValue = 255),
  # light and dark ends from colour theme in word doc
  `light orange`               = grDevices::rgb(253, 242, 205, maxColorValue = 255),
  `dark orange`                = grDevices::rgb(124, 95, 3, maxColorValue = 255),
  `light charcoal`             = grDevices::rgb(235, 233, 231, maxColorValue = 255),
  `dark charcoal`              =        "#000000", # black
  `light slate`                = grDevices::rgb(224, 226, 227, maxColorValue = 255),
  `dark slate`                 = grDevices::rgb(51, 55, 57, maxColorValue = 255),
  `light blue`                 = grDevices::rgb(221, 229, 242, maxColorValue = 255),
  `dark blue`                  = grDevices::rgb(38, 61, 102, maxColorValue = 255),
  `light red`                  = grDevices::rgb(251, 224, 220, maxColorValue = 255),
  `dark red`                   = grDevices::rgb(144, 29, 16, maxColorValue = 255),
  `light yellow`               = grDevices::rgb(254, 249, 235, maxColorValue = 255),
  `dark yellow`                = grDevices::rgb(197, 152, 5, maxColorValue = 255),
  `light grey`                 = grDevices::rgb(236, 237, 238, maxColorValue = 255),
  `dark grey`                  = grDevices::rgb(79, 84, 88, maxColorValue = 255),
  `light white`                = grDevices::rgb(242, 242, 242, maxColorValue = 255),
  `dark white`                 = grDevices::rgb(127, 127, 127, maxColorValue = 255),
  `red2`                       = grDevices::rgb(215, 25, 28, maxColorValue = 255),
  `orange2`                    = grDevices::rgb(253, 174, 97, maxColorValue = 255),
  `yellow2`                    = grDevices::rgb(255, 255, 191, maxColorValue = 255),
  `green2`                     = grDevices::rgb(171, 221, 164, maxColorValue = 255),
  `blue2`                      = grDevices::rgb(43, 131, 186, maxColorValue = 255) # "#2b83ba"
)

SU_cols <- function(...) {
  cols <- c(...)
  
  if (is.null(cols)) {
    return(SU_colours)
  }
  
  SU_colours[cols]
}

SU_palettes <- list(
  `main` = SU_cols("orange", "charcoal", "slate", "blue", "red"),
  `oranges` = SU_cols("light orange", "orange", "dark orange"),
  `slates` = SU_cols("light slate", "slate", "dark slate"),
  `mixed` = SU_cols("dark red", "orange", "yellow", "light blue", "slate"),
  `oj_coal` = SU_cols("yellow", "orange", "red", "dark red", "dark charcoal"),
  `oj_red` = SU_cols("yellow", "orange", "red", "dark red"),
  `white_oj_coal` = SU_cols("white", "yellow", "orange", "red", "dark red", "dark charcoal"), # added since shared
  `lyellow_oj_coal` = SU_cols("light yellow", "orange", "red", "dark red", "dark charcoal"), # added since shared
  `wy_oj_coal` = SU_cols("white", "light yellow", "yellow", "orange", "red", "dark red", "charcoal", "dark charcoal"),
  `red_coal` = SU_cols("red", "dark red", "charcoal", "dark charcoal"),
  `blue_yellow_red` = SU_cols("red2", "orange2", "yellow2", "green2", "blue2"),
  `red_yellow_blue` = SU_cols("blue2", "green2", "yellow2", "orange2", "red2")
)


SU_pal <- function(palette = "main", reverse = FALSE, ...) {
  pal <- SU_palettes[[palette]]
  
  if (reverse) pal <- rev(pal)
  
  colorRampPalette(pal, ...)
}


scale_color_SU <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
  pal <- SU_pal(palette = palette, reverse = reverse)
  
  if (discrete) {
    discrete_scale("colour", paste0("SU_", palette), palette = pal, ...)
  } else {
    scale_color_gradientn(colours = pal(256), ...)
  }
}

scale_fill_SU <- function(palette = "main", discrete = TRUE, reverse = FALSE, ...) {
  pal <- SU_pal(palette = palette, reverse = reverse)
  
  if (discrete) {
    discrete_scale("fill", paste0("SU_", palette), palette = pal, ...)
  } else {
    scale_fill_gradientn(colours = pal(256), ...)
  }
}

theme_SU <- function(base_size) {
  theme_minimal(
    # base_family = "Segoe UI",
    base_size = 12
  ) %+replace%
    theme(
      axis.title = element_text(size = 11, face = "bold", colour = SU_cols("charcoal")),
      plot.title = element_text(hjust = 0, face = "bold", size = 12, colour = SU_cols("charcoal"), margin = margin(b = 4, unit = "pt")),
      plot.subtitle = element_text(hjust = 0, face = "italic", size = 10, colour = SU_cols("charcoal"), margin = margin(b = 4, unit = "pt")),
      plot.caption = element_text(hjust = 0, face = "italic", size = 9, colour = SU_cols("slate"), margin = margin(b = 4, unit = "pt")),
      legend.text = element_text(size = 10, colour = SU_cols("charcoal")),
      legend.title = element_text(face = "bold", size = 11, colour = SU_cols("charcoal"), margin = margin(b = 4, unit = "pt"))
    )
}

theme_set(theme_SU())

# Read in model classifications and demographic/key-measure info ----
clust_ass_demogra_key_meas <-
  read_csv("cluster_assignments_demographics_key_measures.csv") |>
  mutate(
    AgeRange =
      factor(AgeRange,
             levels = c("10-19",
                        "20-29",
                         "30-39",
                         "40-49",
                         "50-59",
                         "60-69",
                         "70-79",
                         "80-89",
                         "90-99",
                         "100+")),
    imd_quintile = as.character(imd_quintile),
    EthnicGroup =
      factor(EthnicGroup,
             levels = c("White",
                        "Black or Black British",
                        "Asian or Asian British",
                        "Mixed",
                        "Other",
                        "Not stated",
                        "Unknown")),
    team_input_count = as.numeric(team_input_count),         # nulls = no data so NA
    avg_contact_duration = as.numeric(avg_contact_duration), # nulls = no data so NA
    intermittent_care_periods = case_when(intermittent_care_periods == "null" ~ 0,
                                          TRUE ~ as.numeric(intermittent_care_periods)),   # nulls = no periods so 0
    acute_admissions_2223 =  case_when(acute_admissions_2223 == "null" ~ 0,
                                       TRUE ~ as.numeric(acute_admissions_2223))           # nulls = no admissions so 0
    )

# Set selected model ----
selected_model_col <- clust_ass_demogra_key_meas$km8_clust

# Functions ----

summary_table_func <- function(data) {
  
  # N-patients
  n_patients <-
    data|>
    group_by({{selected_model_col}}) |>
    summarise(n_patients = n_distinct(person_id)) |>
    rename(cluster = 1) |>
    mutate(patient_prop = n_patients/sum(n_patients)*100)  
  
  # Gender split
  gender <-
    data|>
    group_by({{selected_model_col}}, Gender) |>
    summarise(n_patients = n_distinct(person_id)) |>
    rename(cluster = 1) |>
    pivot_wider(id_cols = cluster, names_from = Gender, values_from = n_patients) |>
    mutate(Female_prop = Female/sum(Female, Male)*100,
           Male_prop = Male/sum(Female, Male)*100) 
  
  # Median age
  age_range_numeric <-
    clust_ass_demogra_key_meas |>
    select(AgeRange) |>
    distinct() |>
    arrange(AgeRange) |>
    mutate(AgeRange_num = as.numeric(row_number()))
  
  age <-
    data|>
    left_join(age_range_numeric, by = "AgeRange") |> # add numerical value for age_range
    select(person_id, AgeRange_num) |>
    group_by({{selected_model_col}}) |>
    summarise(med_age_range = median(AgeRange_num)) |>
    rename(cluster = 1) |>
    left_join(age_range_numeric, by = c("med_age_range" = "AgeRange_num")) |>
    select(cluster, AgeRange)
  
  # Care duration
  duration <-
    data |>
    group_by({{selected_model_col}}) |>
    summarise(med_duration = median(length_care)) |>
    rename(cluster = 1) |>
    mutate(med_months = med_duration/30)
  
  # Avg contacts per person
  contacts_person <-
    data |>
    group_by({{selected_model_col}}) |>
    summarise(med_contacts = median(care_contacts),
              med_referrals = median(referrals)
              ) |>
    rename(cluster = 1) ## Check why I limited contacts to less that 50...?
  
  # ... Rest
  others <- # rename ...
    data |>
    group_by({{selected_model_col}}) |>
    summarise(
      prop_aft_midpoint = median(contact_prop_aft_midpoint),
      team_input = median(team_input_count, na.rm = T),
      daily_contacts = median(average_daily_contacts, na.rm = T),
      intermittent_care_periods = median(intermittent_care_periods, na.rm = T),
      acute_admissions = median(acute_admissions_2223, na.rm = T),
      remote_care_prop = median(remote_contact_prop, na.rm = T),
      specialist_care_prop = median(specialist_contact_prop, na.rm = T)
    ) |>
    rename(cluster = 1)
  
  # combine
  comb_table <-
    n_patients |>
    left_join(gender, by = "cluster") |>
    left_join(age, by = "cluster") |>
    left_join(duration, by = "cluster") |>
    left_join(contacts_person, by = "cluster") |>
    left_join(others, by = "cluster")
  
  comb_table
  
}

create_dt <- function(x) {
    
    DT::datatable(
        x
        , extensions = "Buttons"
        , options = list(
            dom = "Blfrtip"
            , buttons = c("copy", "csv")
            , lengthMenu = list(
                c(10, 25, 50, -1)
                , c(10, 25, 50, "All")
                ),
            width = "400px",  # Specify the width
            height = "200px" # Specify the height
            )
        )
}

## Display clusters by demographic 
demographic_vis <- function(grouping_var, title_group, legend_title) {

  a <-
    clust_ass_demogra_key_meas |>
    group_by({{grouping_var}}) |>
    summarise(n = n_distinct(person_id)) |>
    mutate(prop = round(n/sum(n)*100,1)) |>

    ggplot(aes(x = prop,
               y = "All pts",
               fill = fct_rev({{grouping_var}}),
               label = prop)
    ) +
    geom_col(position="stack") +
    geom_label(hjust = 0.5, position = position_stack(vjust = 0.5), size = 4) +
    scale_fill_brewer(palette = "Paired") +
    theme_minimal() +
    theme(legend.position = "none",
          axis.title = element_blank()
    ) +
    labs(title = paste0("Distibution of ", title_group, " in sample and clusters"),
         subtitle = "Clustering method: Density based model - dbscan")


  b <-
    clust_ass_demogra_key_meas |>
    group_by({{selected_model_col}}, {{grouping_var}}) |>
    summarise(n = n_distinct(person_id)) |>
    mutate(prop = round(n/sum(n)*100,1)) |>
    rename(cluster = 1) |>

    ggplot(aes(x = prop,
               y = reorder(as.character(cluster), -cluster),
               fill = fct_rev({{grouping_var}}),
               label = prop)
    ) +
    geom_col(position="stack") +
    geom_label(hjust = 0.5, position = position_stack(vjust = 0.5), size = 4, show.legend = FALSE) +
    scale_fill_brewer(palette = "Paired") +
    theme_minimal() +
    labs(x = "Percent (%)",
         y = "Cluster",
         fill = legend_title
    )

  a / b + plot_layout(heights = c(0.2, 0.8))


  }


# Lookups ----

icb_23_lookup <- st_read("https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/SICBL23_ICB23_NHSER23_EN_LU/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson", quiet = TRUE)

icb_shp_23 <- st_read("https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/Integrated_Care_Boards_April_2023_EN_BGC/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson", quiet = TRUE)

Having selected the most appropriate cluster model, we explore the nature of each cluster with regards to service use and demographic distribution.

Cluster details

Summary table

First we display the average demographic and service usage values from our key measures by cluster, in tabular form:

Patients and activity per cluster:

Examine demographic distribution

ICB map

Visualise clusters by key measures