<- function(data){
this_long_function
<-
data_tmp |>
data |>
do_this |>
then_do_that
and_that
<-
population_dataset read_this_file()
<-
data_tmp |>
data_tmp join(population_dataset)
<- model_this(data_tmp)
data_model
<-
data_model_estimates |>
data_model predict()
<-
data_final |>
data_model_estimates |>
do_this |>
then_do_that
and_that
data_final
}
Modularising code
coding-practices
I’ve been working on a project where I’ve had to incorporate better development practices. One issue was the long-winded functions that required breaking up into into smaller pieces for unit testing, and easy digestion by future developers.
Background
- Working with non-programming but data/stat experts
- Long functions
- No testing
Example function
Issues: How do we easily understand what each step is doing and further ensure they’re doing them correctly?
Modular code
<- function(data){
structure_input_data |>
data |>
do_this |>
then_do_that
and_that
}
<- function(data){
join_population_data
<-
population_dataset read_this_file()
|>
data join(population_dataset)
}
# ...
<- function(data){
this_long_function
<-
data_tmp structure_input_data()
<-
data_tmp join_population_data()
# ...
}
Testing modular code
Now we can test the functionality and expected outputs of the functions
test_that("structure_input_data works", {
# A test dataset
=
test_data data.frame(
year = c(2022, 2021, 2022, 2021),
sex = c('male', 'male', 'female', 'female'),
value = c(4,3,2,1)
)
# What is to be expected
=
expected_data data.frame(
ReportingValue1 = c(2022, 2021, 2022, 2021),
ReportingValue2 = c('male', 'male', 'female', 'female'),
Value = c(4,3,2,1)
)
# The output from the function
<-
result structure_input_data(test_data)
# A number of tests
::expect_true(
testthatnames(result) == names(expected_data)
)
::expect_true(
testthat$ReportingValue1 == test_data$year
result
)
})