csfmt_rts_data_v1
(vignette("csfmt_rts_data_v1", package = "cstidy")
) is a
data format for real-time surveillance.
<- cstidy::generate_test_data()
d ::set_csfmt_rts_data_v1(d)
cstidy
# Looking at the dataset
d[]#> granularity_time granularity_geo country_iso3 location_code border age
#> 1: isoyearweek county nor county_nor42 NA <NA>
#> 2: isoyearweek county nor county_nor32 NA <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA>
#> 6: isoyearweek county nor county_nor15 NA <NA>
#> 7: isoyearweek county nor county_nor18 NA <NA>
#> 8: isoyearweek county nor county_nor03 NA <NA>
#> 9: isoyearweek county nor county_nor11 NA <NA>
#> 10: isoyearweek county nor county_nor40 NA <NA>
#> 11: isoyearweek county nor county_nor55 NA <NA>
#> 12: isoyearweek county nor county_nor50 NA <NA>
#> 13: isoyearweek county nor county_nor39 NA <NA>
#> 14: isoyearweek county nor county_nor46 NA <NA>
#> 15: isoyearweek county nor county_nor31 NA <NA>
#> 16: isoyearweek county nor county_nor42 NA total
#> 17: isoyearweek county nor county_nor32 NA total
#> 18: isoyearweek county nor county_nor33 NA total
#> 19: isoyearweek county nor county_nor56 NA total
#> 20: isoyearweek county nor county_nor34 NA total
#> 21: isoyearweek county nor county_nor15 NA total
#> 22: isoyearweek county nor county_nor18 NA total
#> 23: isoyearweek county nor county_nor03 NA total
#> 24: isoyearweek county nor county_nor11 NA total
#> 25: isoyearweek county nor county_nor40 NA total
#> 26: isoyearweek county nor county_nor55 NA total
#> 27: isoyearweek county nor county_nor50 NA total
#> 28: isoyearweek county nor county_nor39 NA total
#> 29: isoyearweek county nor county_nor46 NA total
#> 30: isoyearweek county nor county_nor31 NA total
#> 31: isoyearweek county nor county_nor42 NA 000_005
#> 32: isoyearweek county nor county_nor32 NA 000_005
#> 33: isoyearweek county nor county_nor33 NA 000_005
#> 34: isoyearweek county nor county_nor56 NA 000_005
#> 35: isoyearweek county nor county_nor34 NA 000_005
#> 36: isoyearweek county nor county_nor15 NA 000_005
#> 37: isoyearweek county nor county_nor18 NA 000_005
#> 38: isoyearweek county nor county_nor03 NA 000_005
#> 39: isoyearweek county nor county_nor11 NA 000_005
#> 40: isoyearweek county nor county_nor40 NA 000_005
#> 41: isoyearweek county nor county_nor55 NA 000_005
#> 42: isoyearweek county nor county_nor50 NA 000_005
#> 43: isoyearweek county nor county_nor39 NA 000_005
#> 44: isoyearweek county nor county_nor46 NA 000_005
#> 45: isoyearweek county nor county_nor31 NA 000_005
#> granularity_time granularity_geo country_iso3 location_code border age
#> sex isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 2: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 3: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 4: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 5: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 6: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 7: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 8: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 9: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 10: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 11: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 12: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 13: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 14: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 15: <NA> 2022 3 2022-03 2021/2022 26 NA NA
#> 16: total 2022 3 2022-03 2021/2022 26 NA NA
#> 17: total 2022 3 2022-03 2021/2022 26 NA NA
#> 18: total 2022 3 2022-03 2021/2022 26 NA NA
#> 19: total 2022 3 2022-03 2021/2022 26 NA NA
#> 20: total 2022 3 2022-03 2021/2022 26 NA NA
#> 21: total 2022 3 2022-03 2021/2022 26 NA NA
#> 22: total 2022 3 2022-03 2021/2022 26 NA NA
#> 23: total 2022 3 2022-03 2021/2022 26 NA NA
#> 24: total 2022 3 2022-03 2021/2022 26 NA NA
#> 25: total 2022 3 2022-03 2021/2022 26 NA NA
#> 26: total 2022 3 2022-03 2021/2022 26 NA NA
#> 27: total 2022 3 2022-03 2021/2022 26 NA NA
#> 28: total 2022 3 2022-03 2021/2022 26 NA NA
#> 29: total 2022 3 2022-03 2021/2022 26 NA NA
#> 30: total 2022 3 2022-03 2021/2022 26 NA NA
#> 31: total 2022 3 2022-03 2021/2022 26 NA NA
#> 32: total 2022 3 2022-03 2021/2022 26 NA NA
#> 33: total 2022 3 2022-03 2021/2022 26 NA NA
#> 34: total 2022 3 2022-03 2021/2022 26 NA NA
#> 35: total 2022 3 2022-03 2021/2022 26 NA NA
#> 36: total 2022 3 2022-03 2021/2022 26 NA NA
#> 37: total 2022 3 2022-03 2021/2022 26 NA NA
#> 38: total 2022 3 2022-03 2021/2022 26 NA NA
#> 39: total 2022 3 2022-03 2021/2022 26 NA NA
#> 40: total 2022 3 2022-03 2021/2022 26 NA NA
#> 41: total 2022 3 2022-03 2021/2022 26 NA NA
#> 42: total 2022 3 2022-03 2021/2022 26 NA NA
#> 43: total 2022 3 2022-03 2021/2022 26 NA NA
#> 44: total 2022 3 2022-03 2021/2022 26 NA NA
#> 45: total 2022 3 2022-03 2021/2022 26 NA NA
#> sex isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> calyearmonth date deaths_n
#> 1: <NA> 2022-01-23 4
#> 2: <NA> 2022-01-23 4
#> 3: <NA> 2022-01-23 8
#> 4: <NA> 2022-01-23 3
#> 5: <NA> 2022-01-23 4
#> 6: <NA> 2022-01-23 4
#> 7: <NA> 2022-01-23 7
#> 8: <NA> 2022-01-23 3
#> 9: <NA> 2022-01-23 6
#> 10: <NA> 2022-01-23 10
#> 11: <NA> 2022-01-23 5
#> 12: <NA> 2022-01-23 5
#> 13: <NA> 2022-01-23 4
#> 14: <NA> 2022-01-23 4
#> 15: <NA> 2022-01-23 6
#> 16: <NA> 2022-01-23 4
#> 17: <NA> 2022-01-23 4
#> 18: <NA> 2022-01-23 8
#> 19: <NA> 2022-01-23 3
#> 20: <NA> 2022-01-23 4
#> 21: <NA> 2022-01-23 4
#> 22: <NA> 2022-01-23 7
#> 23: <NA> 2022-01-23 3
#> 24: <NA> 2022-01-23 6
#> 25: <NA> 2022-01-23 10
#> 26: <NA> 2022-01-23 5
#> 27: <NA> 2022-01-23 5
#> 28: <NA> 2022-01-23 4
#> 29: <NA> 2022-01-23 4
#> 30: <NA> 2022-01-23 6
#> 31: <NA> 2022-01-23 4
#> 32: <NA> 2022-01-23 4
#> 33: <NA> 2022-01-23 8
#> 34: <NA> 2022-01-23 3
#> 35: <NA> 2022-01-23 4
#> 36: <NA> 2022-01-23 4
#> 37: <NA> 2022-01-23 7
#> 38: <NA> 2022-01-23 3
#> 39: <NA> 2022-01-23 6
#> 40: <NA> 2022-01-23 10
#> 41: <NA> 2022-01-23 5
#> 42: <NA> 2022-01-23 5
#> 43: <NA> 2022-01-23 4
#> 44: <NA> 2022-01-23 4
#> 45: <NA> 2022-01-23 6
#> calyearmonth date deaths_n
csfmt_rts_data_v1
does smart assignment for time and
geography.
When the variables in bold are assigned using
:=
, the listed variables will be automatically imputed.
location_code:
isoyear:
isoyearweek:
date:
<- cstidy::generate_test_data()[1:5]
d ::set_csfmt_rts_data_v1(d)
cstidy
# Looking at the dataset
d[]#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyearweek county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2022 3 2022-03 2021/2022 26 NA NA
#> 2: 2022 3 2022-03 2021/2022 26 NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2022 3 2022-03 2021/2022 26 NA NA
#> 5: 2022 3 2022-03 2021/2022 26 NA NA
#> calyearmonth date deaths_n
#> 1: <NA> 2022-01-23 8
#> 2: <NA> 2022-01-23 7
#> 3: <NA> 2022-01-23 6
#> 4: <NA> 2022-01-23 2
#> 5: <NA> 2022-01-23 7
# Smart assignment of time columns (note how granularity_time, isoyear, isoyearweek, date all change)
1,isoyearweek := "2021-01"]
d[
d#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyearweek county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2021 1 2021-01 2020/2021 24 NA NA
#> 2: 2022 3 2022-03 2021/2022 26 NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2022 3 2022-03 2021/2022 26 NA NA
#> 5: 2022 3 2022-03 2021/2022 26 NA NA
#> calyearmonth date deaths_n
#> 1: <NA> 2021-01-10 8
#> 2: <NA> 2022-01-23 7
#> 3: <NA> 2022-01-23 6
#> 4: <NA> 2022-01-23 2
#> 5: <NA> 2022-01-23 7
# Smart assignment of time columns (note how granularity_time, isoyear, isoyearweek, date all change)
2,isoyear := 2019]
d[
d#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: isoyearweek county nor county_nor56 NA <NA> <NA>
#> 5: isoyearweek county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2021 1 2021-01 2020/2021 24 NA NA
#> 2: 2019 52 2019-52 <NA> NA NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2022 3 2022-03 2021/2022 26 NA NA
#> 5: 2022 3 2022-03 2021/2022 26 NA NA
#> calyearmonth date deaths_n
#> 1: <NA> 2021-01-10 8
#> 2: <NA> 2019-12-29 7
#> 3: <NA> 2022-01-23 6
#> 4: <NA> 2022-01-23 2
#> 5: <NA> 2022-01-23 7
# Smart assignment of time columns (note how granularity_time, isoyear, isoyearweek, date all change)
4:5,date := as.Date("2020-01-01")]
d[
d#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: date county nor county_nor56 NA <NA> <NA>
#> 5: date county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2021 1 2021-01 2020/2021 24 NA NA
#> 2: 2019 52 2019-52 <NA> NA NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2020 1 2020-01 2019/2020 24 2020 1
#> 5: 2020 1 2020-01 2019/2020 24 2020 1
#> calyearmonth date deaths_n
#> 1: <NA> 2021-01-10 8
#> 2: <NA> 2019-12-29 7
#> 3: <NA> 2022-01-23 6
#> 4: 2020-M01 2020-01-01 2
#> 5: 2020-M01 2020-01-01 7
# Smart assignment fails when multiple time columns are set
1,c("isoyear","isoyearweek") := .(2021,"2021-01")]
d[#> Warning in `[.csfmt_rts_data_v1`(d, 1, `:=`(c("isoyear", "isoyearweek"), :
#> Multiple time variables specified. Smart-assignment disabled.
d#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek county nor county_nor42 NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: date county nor county_nor56 NA <NA> <NA>
#> 5: date county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2021 1 2021-01 2020/2021 24 NA NA
#> 2: 2019 52 2019-52 <NA> NA NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2020 1 2020-01 2019/2020 24 2020 1
#> 5: 2020 1 2020-01 2019/2020 24 2020 1
#> calyearmonth date deaths_n
#> 1: <NA> 2021-01-10 8
#> 2: <NA> 2019-12-29 7
#> 3: <NA> 2022-01-23 6
#> 4: 2020-M01 2020-01-01 2
#> 5: 2020-M01 2020-01-01 7
# Smart assignment of geo columns
1,c("location_code") := .("norge")]
d[
d#> granularity_time granularity_geo country_iso3 location_code border age sex
#> 1: isoyearweek nation nor norge NA <NA> <NA>
#> 2: isoyear county nor county_nor32 NA <NA> <NA>
#> 3: isoyearweek county nor county_nor33 NA <NA> <NA>
#> 4: date county nor county_nor56 NA <NA> <NA>
#> 5: date county nor county_nor34 NA <NA> <NA>
#> isoyear isoweek isoyearweek season seasonweek calyear calmonth
#> 1: 2021 1 2021-01 2020/2021 24 NA NA
#> 2: 2019 52 2019-52 <NA> NA NA NA
#> 3: 2022 3 2022-03 2021/2022 26 NA NA
#> 4: 2020 1 2020-01 2019/2020 24 2020 1
#> 5: 2020 1 2020-01 2019/2020 24 2020 1
#> calyearmonth date deaths_n
#> 1: <NA> 2021-01-10 8
#> 2: <NA> 2019-12-29 7
#> 3: <NA> 2022-01-23 6
#> 4: 2020-M01 2020-01-01 2
#> 5: 2020-M01 2020-01-01 7
# Collapsing down to different levels, and healing the dataset
# (so that it can be worked on further with regards to real time surveillance)
deaths_n = sum(deaths_n), location_code = "norge"), keyby=.(granularity_time)] %>%
d[, .(::set_csfmt_rts_data_v1(create_unified_columns = FALSE) %>%
cstidyprint()
#> granularity_time deaths_n location_code date
#> 1: date 9 norge <NA>
#> 2: isoyear 7 norge <NA>
#> 3: isoyearweek 14 norge <NA>
# Collapsing to different levels, and removing the class csfmt_rts_data_v1 because
# it is going to be used in new output/analyses
deaths_n = sum(deaths_n), location_code = "norge"), keyby=.(granularity_time)] %>%
d[, .(::remove_class_csfmt_rts_data() %>%
cstidyprint()
#> granularity_time deaths_n location_code
#> 1: date 9 norge
#> 2: isoyear 7 norge
#> 3: isoyearweek 14 norge
We need a way to easily summarize the data structure of a dataset.
::generate_test_data() %>%
cstidy::set_csfmt_rts_data_v1() %>%
cstidysummary()
#>
#> granularity_time
#> ✅ No errors
#>
#> granularity_geo
#> ✅ No errors
#>
#> country_iso3
#> ✅ No errors
#>
#> location_code
#> ✅ No errors
#>
#> border
#> ❌ Errors:
#> - NA exists (not allowed)
#>
#> age
#> ✅ No errors
#>
#> sex
#> ✅ No errors
#>
#> isoyear
#> ✅ No errors
#>
#> isoweek
#> ✅ No errors
#>
#> isoyearweek
#> ✅ No errors
#>
#> season
#> ✅ No errors
#>
#> seasonweek
#> ✅ No errors
#>
#> calyear
#> ✅ No errors
#>
#> calmonth
#> ✅ No errors
#>
#> calyearmonth
#> ✅ No errors
#>
#> date
#> ✅ No errors
#> granularity_time (character):
#> - isoyearweek (n = 45)
#> granularity_geo (character):
#> - county (n = 45)
#> country_iso3 (character):
#> - nor (n = 45)
#> location_code (character)
#> border (integer):
#> - <NA> (n = 45)
#> age (character):
#> - 000_005 (n = 15)
#> - <NA> (n = 15)
#> - total (n = 15)
#> sex (character):
#> - <NA> (n = 15)
#> - total (n = 30)
#> isoyear (integer):
#> - 2022 (n = 45)
#> isoweek (integer)
#> isoyearweek (character)
#> season (character):
#> - 2021/2022 (n = 45)
#> seasonweek (numeric)
#> calyear (integer)
#> calmonth (integer)
#> calyearmonth (character)
#> date (Date)
#> deaths_n (integer)
We need a way to easily summarize the data structure of one column inside a dataset.
::generate_test_data() %>%
cstidy::set_csfmt_rts_data_v1() %>%
cstidy::identify_data_structure("deaths_n") %>%
cstidyplot()