Minimising the rate of missing values or NAs is one of the main objectives in ReSECT data quality control. This document represents the main relationships between NAs, and quantifies the number of NAs per Centre for certain fields considered key to the anatomical lung resection process. This information complements the use of filters within the platform, which should be used to determine which cases should be reviewed.
In order for the use of the built-in data platform filters to find out missing values, it is NECESSARY, that at the time of including every patient in the anatomical lung resection process, ALL the forms are activated. Such activation is obtained by completing the first variable of each form, regardless of whether each form is fully or partially completed at that time.
Pre-OP
Code
%>%
datosClinicos_rpa mutate(dlco_percent_ = ifelse(realizacion_de_difusion == "No", "-", dlco_percent)) %>%
select(
Hospital = hospital,
Gender = sexo,
BMI = indice_de_masa_corporal,
Smoking = tabaquismo,
Comorbidities = comorbilidades,
`FEV1(%)` = fev1_percent,
`DLCO (%)` = dlco_percent_,
ASA = riesgo_asa,
ECOG = ecog,
Dyspnea = grado_de_disnea,
`Previous Thoracic Surg,` = cirugia_toracica_previa) %>%
gg_miss_upset(nsets = 10)
Code
<- datosClinicos_rpa %>%
nas_preop mutate(dlco_percent_ = ifelse(realizacion_de_difusion == "No", "-", dlco_percent)) %>%
select(
Hospital = hospital,
Gender = sexo,
BMI = indice_de_masa_corporal,
Smoking = tabaquismo,
Comorbidities = comorbilidades,
`FEV1(%)` = fev1_percent,
`DLCO (%)` = dlco_percent_,
ASA = riesgo_asa,
ECOG = ecog,
Dyspnea = grado_de_disnea,
`Previous Thoracic Surg,` = cirugia_toracica_previa) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_preop count("Hospital" = hospital)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
nas_preop full_join(pacientes_preop, by="Hospital") %>%
group_by(Hospital) %>%
mutate(n = ifelse(duplicated(n), NA, n)) %>%
ungroup() %>%
ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Gender", "BMI", "Smoking", "Comorbidities",
"FEV1 (%)", "DLCO (%)", "ASA", "ECOG", "Dyspnea", "Previos Thoracic Surgery"),
fill = nas)) +
geom_tile() +
geom_text(aes(label = n), color="white", size=1.8, nudge_y =-0.3)+
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7, margin = margin(r = 0)),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "Nº NAs", y = NULL, x = NULL,
caption = "\nThe color of the tiles should be interpreted in conjunction with the number
of patients recruited by each department used to compute the number of misssing values in this plot")
Code
<- datosClinicos_rpa %>%
nas_preop mutate(dlco_percent_ = ifelse(realizacion_de_difusion == "No", "-", dlco_percent)) %>%
select(
Hospital = hospital,
Gender = sexo,
BMI = indice_de_masa_corporal,
Smoking = tabaquismo,
Comorbidities = comorbilidades,
`FEV1(%)` = fev1_percent,
`DLCO (%)` = dlco_percent_,
ASA = riesgo_asa,
ECOG = ecog,
Dyspnea = grado_de_disnea,
`Previous Thoracic Surg,` = cirugia_toracica_previa) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>% count(Hospital = hospital)
pacientes
<- inner_join(x = nas_preop, y = pacientes, by = "Hospital") %>%
percent_nas_preop mutate_if(is.numeric, funs(.*100/n)) %>% select(-n)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "percent_nas") %>%
percent_nas_preop ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Gender", "BMI", "Smoking", "Comorbidities",
"FEV1 (%)", "DLCO (%)", "ASA", "ECOG", "Dyspnea", "Previos Thoracic Surgery"),
fill = percent_nas)) +
geom_tile() +
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7))+
labs(fill = "% NAs", y = NULL, x = NULL)
Surgery
Code
%>%
datosClinicos_rpa mutate(conversion = ifelse(abordaje_final != "Toracotomía", "-", conversion),
lobectomia = ifelse(procedimiento_pulmonar != "Lobectomía", "-", lobectomia)) %>%
select(
Hospital = hospital,
Age = edad_del_paciente_a_la_fecha_de_intervencion,
Procedure = procedimiento_pulmonar,
Approach = abordaje_final,
Lymphad. = tipo_de_linfadenectomia_hilio_mediastinica,
Conversion = conversion,
'Functioning Seg.'= numero_de_segmentos_funcionantes_resecados) %>%
gg_miss_upset(nsets = 8)
Code
<- datosClinicos_rpa %>%
nas_cirugia mutate(conversion = ifelse(abordaje_final != "Toracotomía", "-", conversion),
lobectomia = ifelse(procedimiento_pulmonar != "Lobectomía", "-", lobectomia)) %>%
select(
Hospital = hospital,
Age = edad_del_paciente_a_la_fecha_de_intervencion,
Procedure = procedimiento_pulmonar,
Approach = abordaje_final,
Lymphad. = tipo_de_linfadenectomia_hilio_mediastinica,
Conversion = conversion,
'Functioning Seg.'= numero_de_segmentos_funcionantes_resecados) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_cirugia count("Hospital" = hospital)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
nas_cirugia full_join(pacientes_cirugia, by="Hospital") %>%
group_by(Hospital) %>%
mutate(n = ifelse(duplicated(n), NA, n)) %>%
ungroup() %>%
ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Age", "Procedure", "Approach", "Conversion","Lymphad.", "Functioning Seg."),
fill = nas)) +
geom_tile() +
geom_text(aes(label = n), color="white", size=1.8, nudge_y = -0.4)+
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7, margin = margin(r = 0)),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "Nº NAs", y = NULL, x = NULL,
caption = "\nThe color of the tiles should be interpreted in conjunction with the number
of patients recruited by each department used to compute the number of misssing values in this plot")
Code
<- datosClinicos_rpa %>%
nas_cirugia mutate(conversion = ifelse(abordaje_final != "Toracotomía", "-", conversion),
lobectomia = ifelse(procedimiento_pulmonar != "Lobectomía", "-", lobectomia)) %>%
select(
Hospital = hospital,
Age = edad_del_paciente_a_la_fecha_de_intervencion,
Procedure = procedimiento_pulmonar,
Approach = abordaje_final,
Lymphad. = tipo_de_linfadenectomia_hilio_mediastinica,
Conversion = conversion,
'Functioning Seg.'= numero_de_segmentos_funcionantes_resecados) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>% count(Hospital = hospital)
pacientes
<- inner_join(x = nas_cirugia, y = pacientes, by = "Hospital") %>%
percent_nas_cirugia mutate_if(is.numeric, funs(.*100/n)) %>% select(-n)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "percent_nas") %>%
percent_nas_cirugia ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Age", "Procedure", "Approach", "Conversion","Lymphad.", "Functioning Seg."),
fill = percent_nas)) +
geom_tile() +
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7))+
labs(fill = "% NAs", y = NULL, x = NULL)
Post-OP
Code
%>%
datosClinicos_rpa mutate(grado = ifelse(
== "No", "-", grado_complicaciones_postoperatorias)) %>%
complicaciones_postoperatorias select(
Hospital = hospital,
'Postop Care' = cuidados_inmediatos_postop,
'Non-Expected ICU' = ingreso_no_esperado_en_cuidados_intermedios_o_intensivos,
Reintervention = reintervencion_quirurgica,
Complications = complicaciones_postoperatorias,
'Clavien-Dindo' = grado,
'Discharge Date' = fecha_de_alta,
'Discharge Status' = estado_de_alta) %>%
gg_miss_upset(nsets = 7)
Code
<- datosClinicos_rpa %>%
nas_postop mutate(grado = ifelse(
== "No", "-", grado_complicaciones_postoperatorias)) %>%
complicaciones_postoperatorias select(
Hospital = hospital,
'Postop Care' = cuidados_inmediatos_postop,
'Non-Expected ICU' = ingreso_no_esperado_en_cuidados_intermedios_o_intensivos,
Reintervention = reintervencion_quirurgica,
Complications = complicaciones_postoperatorias,
'Clavien-Dindo' = grado,
'Discharge Date' = fecha_de_alta,
'Discharge Status' = estado_de_alta) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_postop count("Hospital" = hospital)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
nas_postop full_join(pacientes_postop, by="Hospital") %>%
group_by(Hospital) %>%
mutate(n = ifelse(duplicated(n), NA, n)) %>%
ungroup() %>%
ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Postop Care", "Non-Expected ICU", "Reintervention", "Complications",
"Clavien-Dindo", "Discharge Date", "Discharge Status"),
fill = nas)) +
geom_tile() +
geom_text(aes(label = n), color="white", size=1.8, nudge_y = -0.35)+
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7, margin = margin(r = 0)),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "Nº NAs", y = NULL, x = NULL,
caption = "\nThe color of the tiles should be interpreted in conjunction with the number
of patients recruited by each department used to compute the number of misssing values in this plot")
Code
<- datosClinicos_rpa %>%
nas_postop mutate(grado = ifelse(
== "No", "-", grado_complicaciones_postoperatorias)) %>%
complicaciones_postoperatorias select(
Hospital = hospital,
'Postop Care' = cuidados_inmediatos_postop,
'Non-Expected ICU' = ingreso_no_esperado_en_cuidados_intermedios_o_intensivos,
Reintervention = reintervencion_quirurgica,
Complications = complicaciones_postoperatorias,
'Clavien-Dindo' = grado,
'Discharge Date' = fecha_de_alta,
'Discharge Status' = estado_de_alta) %>%
group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>% count(Hospital = hospital)
pacientes
<- inner_join(x = nas_postop, y = pacientes, by = "Hospital") %>%
percent_nas_postop mutate_if(is.numeric, funs(.*100/n)) %>% select(-n)
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "percent_nas") %>%
percent_nas_postop ggplot(aes(x = Hospital,
y = fct_relevel(Variable, "Postop Care", "Non-Expected ICU", "Reintervention", "Complications",
"Clavien-Dindo", "Discharge Date", "Discharge Status"),
fill = percent_nas)) +
geom_tile() +
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7))+
labs(fill = "% NAs", y = NULL, x = NULL)
Diagnosis
Code
%>%
datosClinicos_rpa filter(grupo_diagnostico == "Tumoral Maligno Primario" &
!= "Recurrencia de un Tumor Maligno Primario ya conocido")%>%
caracter_de_la_enfermedad select(
Hospital = hospital,
Histology = diagnostico_histologico,
Location = localizacion_tumoral,
`Nc CT-Scan` = descriptor_n_segun_tac,
`Invasive Staging` = estadificacion_invasiva_ganglionar,
Neoadjuvant = neoadyuvancia,
Tp = descriptor_t_patologico,
Np = descriptor_n_patologico,
Mp = descriptor_m_patologico,
`Complete Resection` = reseccion_completa
%>%
) gg_miss_upset(nsets = 7)
Code
<- datosClinicos_rpa %>%
nas_grupodx select(
Hospital = hospital,
`Diagnosis Group`= grupo_diagnostico
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_grupodx count(Hospital = hospital, name = "n_grupodx")
<- datosClinicos_rpa %>%
nas_dx filter(grupo_diagnostico == "Tumoral Maligno Primario" &
!= "Recurrencia de un Tumor Maligno Primario ya conocido")%>%
caracter_de_la_enfermedad select(
Hospital = hospital,
Histology = diagnostico_histologico,
Location = localizacion_tumoral,
`Nc CT-Scan` = descriptor_n_segun_tac,
`Invasive Staging` = estadificacion_invasiva_ganglionar,
Neoadjuvant = neoadyuvancia,
Tp = descriptor_t_patologico,
Np = descriptor_n_patologico,
Mp = descriptor_m_patologico,
`Complete Resection` = reseccion_completa
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
%>% full_join(nas_grupodx, by = "Hospital") %>%
nas_dx mutate_all(.funs = ~ replace(., is.na(.), 0)) %>%
pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
full_join(pacientes_grupodx, by="Hospital") %>%
group_by(Hospital) %>%
mutate(n = ifelse(duplicated(n_grupodx), NA, n_grupodx)) %>%
ungroup() %>%
ggplot(aes(x = Hospital,
y = fct_relevel(factor(Variable), "Diagnosis Group", "Histology", "Location", "Nc CT-Scan", "Invasive Staging",
"Neoadjuvant", "Tp", "Np", "Mp", "Complete Resection"),
fill = nas)) +
geom_tile() +
geom_text(aes(label = n), color = "white", size=1.8, nudge_y = -1.3)+
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7, margin = margin(r = -20)),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "Nº NAs", y = NULL, x = NULL,
caption = "\nThe color of the tiles should be interpreted in conjunction with the number
of patients recruited by each department used to compute the number of misssing values in this plot. \n
All characteristics, except ‘Diagnosis Group’, refer to patients with primary malignant
tumors that were not recurrence of a previous tumor.")
Code
# Var Grupo Diagnosito
<- datosClinicos_rpa %>%
nas_grupodx select(
Hospital = hospital,
`Diagnosis Group`= grupo_diagnostico
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_grupodx count(Hospital = hospital, name = "n_grupodx")
<- full_join(nas_grupodx, pacientes_grupodx, by = "Hospital") %>%
percent_nas_grupodx mutate(`Diagnosis Group` = 100 * `Diagnosis Group`/ n_grupodx) %>%
select(- n_grupodx)
# Vars for Primary Malignant (no recurrence)
<- datosClinicos_rpa %>%
nas_dx filter(grupo_diagnostico == "Tumoral Maligno Primario" &
!= "Recurrencia de un Tumor Maligno Primario ya conocido")%>%
caracter_de_la_enfermedad select(
Hospital = hospital,
Histology = diagnostico_histologico,
Location = localizacion_tumoral,
`Nc CT-Scan` = descriptor_n_segun_tac,
`Invasive Staging` = estadificacion_invasiva_ganglionar,
Neoadjuvant = neoadyuvancia,
Tp = descriptor_t_patologico,
Np = descriptor_n_patologico,
Mp = descriptor_m_patologico,
`Complete Resection` = reseccion_completa
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_dx filter(grupo_diagnostico == "Tumoral Maligno Primario" &
!= "Recurrencia de un Tumor Maligno Primario ya conocido")%>%
caracter_de_la_enfermedad count(Hospital = hospital, name = "n_dx")
<- full_join(x = nas_dx, y = pacientes_dx, by = "Hospital") %>%
percent_nas_dx mutate_if(is.numeric, funs(.*100/n_dx)) %>% select(-n_dx)
# Join
<- percent_nas_grupodx %>% full_join(percent_nas_dx, by = "Hospital") %>%
percent_nas mutate_all(.funs = ~ replace(., is.na(.), 0))
%>% pivot_longer(!Hospital, names_to = "Variable", values_to = "percent_nas") %>%
percent_nas ggplot(aes(x = Hospital,
y = fct_relevel(factor(Variable), "Diagnosis Group", "Histology", "Location", "Nc CT-Scan", "Invasive Staging",
"Neoadjuvant", "Tp", "Np", "Mp", "Complete Resection"),
fill = percent_nas)) +
geom_tile() +
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "% NAs", y = NULL, x = NULL,
caption = "All characteristics, except ‘Diagnosis Group’, refer to patients with primary malignant
tumors that were not recurrence of a previous tumor.")
Short-Term Follow-Up
Code
%>%
datosClinicos_rpa filter(grado_complicaciones_postoperatorias != "Grado V = Exitus") %>%
select(
Hospital = hospital,
Readmission = reingreso_hospitalario,
`30-day Status` = estado_a_30_dias,
`90-day Status` = estado_a_90_dias,
%>%
) gg_miss_upset(nsets = 7)
Code
# Presence of readmission
<- datosClinicos_rpa %>%
nas_short filter(complicaciones_postoperatorias != "Grado V = Exitus") %>%
select(
Hospital = hospital,
Readmission = reingreso_hospitalario,
`30-day Status` = estado_a_30_dias,
`90-day Status` = estado_a_90_dias
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_short filter(complicaciones_postoperatorias != "Grado V = Exitus") %>%
count(Hospital = hospital)
%>%
nas_short pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
full_join(pacientes_short, by="Hospital") %>%
group_by(Hospital) %>%
mutate(n = ifelse(duplicated(n), NA, n)) %>%
ungroup() %>%
ggplot(aes(x = Hospital,
y = fct_relevel(factor(Variable), "Readmission", "30-day Status", "90-day Status"),
fill = nas)) +
geom_tile() +
geom_text(aes(label = n), color="white", size=1.8, nudge_y = -0.45)+
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7, margin = margin(r = -5)),
plot.caption = element_text(face = "italic", color = "steelblue", size=6))+
labs(fill = "Nº NAs", x = NULL, y=NULL,
caption = "\nThe color of the tiles should be interpreted in conjunction with the number
of patients recruited by each department used to compute the number of misssing values in this plot")
Code
<- datosClinicos_rpa %>%
nas_short filter(complicaciones_postoperatorias != "Grado V = Exitus") %>%
select(
Hospital = hospital,
Readmission = reingreso_hospitalario,
`30-day Status` = estado_a_30_dias,
`90-day Status` = estado_a_90_dias
%>%
) group_by(Hospital) %>%
summarise_all(funs(sum(is.na(.))))
<- datosClinicos_rpa %>%
pacientes_short filter(complicaciones_postoperatorias != "Grado V = Exitus") %>%
count(Hospital = hospital)
%>% full_join(pacientes_short, by = "Hospital") %>%
nas_short mutate_all(.funs = ~ replace(., is.na(.), 0)) %>%
mutate_if(is.numeric, funs(.*100/n)) %>%
select(-n) %>%
pivot_longer(!Hospital, names_to = "Variable", values_to = "nas") %>%
ggplot(aes(x = Hospital,
y = fct_relevel(factor(Variable), "Readmission", "30-day Status", "90-day Status"),
fill = nas)) +
geom_tile() +
coord_flip() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90),
axis.text.y = element_text(size = 7),
plot.caption = element_text(face = "italic", color = "steelblue"))+
labs(fill = "% NAs", x = NULL, y=NULL)