UTILITY - DRAFT - See names and size of data sets in installed package(s) - internal utility function
Source:R/utils_datapack.R
datapack.Rd
Wrapper for data() and can get memory size of objects
Arguments
- pkg
a character vector giving the package(s) to look in for data sets
- len
Only affects what is printed to console - specifies the number of characters to limit Title to, making it easier to see in the console.
- sortbysize
if TRUE (and simple=F), sort by increasing size of object, within each package, not alpha.
- simple
FALSE to get object sizes, etc., or TRUE to just get names in each package, like
data(package = "EJAM")$results[, c("Package", 'Item')]
Value
If simple = TRUE, data.frame with colnames Package and Item. If simple = FALSE, data.frame with colnames Package, Item, size, Title.Short
Details
do not rely on this much - it was a quick utility. It may create and leave objects in global envt - not careful about that.
Examples
# see just a vector of the data object names
data(package = "EJAM")$results[, 'Item']
#> [1] "NAICS"
#> [2] "SIC"
#> [3] "avg.in.us"
#> [4] "bg_cenpop2020"
#> [5] "bgpts"
#> [6] "blockgroupstats"
#> [7] "censusplaces"
#> [8] "custom"
#> [9] "default_points_shown_at_startup"
#> [10] "ejamdata_version"
#> [11] "ejampackages"
#> [12] "ejscreenRESTbroker2table_na_filler"
#> [13] "epa_programs"
#> [14] "epa_programs_defined"
#> [15] "formulas_all"
#> [16] "formulas_d"
#> [17] "frsprogramcodes"
#> [18] "high_pctiles_tied_with_min"
#> [19] "islandareas"
#> [20] "lat_alias"
#> [21] "lon_alias"
#> [22] "mact_table"
#> [23] "map_headernames"
#> [24] "metadata4pins"
#> [25] "meters_per_mile"
#> [26] "modelDoaggregate"
#> [27] "modelEjamit"
#> [28] "naics_counts"
#> [29] "naicstable"
#> [30] "names_age"
#> [31] "names_age_count"
#> [32] "names_all"
#> [33] "names_all_r"
#> [34] "names_climate"
#> [35] "names_climate_avg"
#> [36] "names_climate_pctile"
#> [37] "names_climate_state_avg"
#> [38] "names_climate_state_pctile"
#> [39] "names_community"
#> [40] "names_community_count"
#> [41] "names_countabove"
#> [42] "names_criticalservice"
#> [43] "names_criticalservice_avg"
#> [44] "names_criticalservice_pctile"
#> [45] "names_criticalservice_state_avg"
#> [46] "names_criticalservice_state_pctile"
#> [47] "names_d"
#> [48] "names_d_avg"
#> [49] "names_d_count"
#> [50] "names_d_demogindexstate"
#> [51] "names_d_extra"
#> [52] "names_d_extra_count"
#> [53] "names_d_language"
#> [54] "names_d_language_count"
#> [55] "names_d_languageli"
#> [56] "names_d_languageli_count"
#> [57] "names_d_other_count"
#> [58] "names_d_pctile"
#> [59] "names_d_ratio_to_avg"
#> [60] "names_d_ratio_to_state_avg"
#> [61] "names_d_state_avg"
#> [62] "names_d_state_pctile"
#> [63] "names_d_subgroups"
#> [64] "names_d_subgroups_alone"
#> [65] "names_d_subgroups_alone_avg"
#> [66] "names_d_subgroups_alone_count"
#> [67] "names_d_subgroups_alone_pctile"
#> [68] "names_d_subgroups_alone_ratio_to_avg"
#> [69] "names_d_subgroups_alone_ratio_to_state_avg"
#> [70] "names_d_subgroups_alone_state_avg"
#> [71] "names_d_subgroups_alone_state_pctile"
#> [72] "names_d_subgroups_avg"
#> [73] "names_d_subgroups_count"
#> [74] "names_d_subgroups_nh"
#> [75] "names_d_subgroups_nh_avg"
#> [76] "names_d_subgroups_nh_count"
#> [77] "names_d_subgroups_nh_pctile"
#> [78] "names_d_subgroups_nh_ratio_to_avg"
#> [79] "names_d_subgroups_nh_ratio_to_state_avg"
#> [80] "names_d_subgroups_nh_state_avg"
#> [81] "names_d_subgroups_nh_state_pctile"
#> [82] "names_d_subgroups_pctile"
#> [83] "names_d_subgroups_ratio_to_avg"
#> [84] "names_d_subgroups_ratio_to_state_avg"
#> [85] "names_d_subgroups_state_avg"
#> [86] "names_d_subgroups_state_pctile"
#> [87] "names_e"
#> [88] "names_e_avg"
#> [89] "names_e_other"
#> [90] "names_e_pctile"
#> [91] "names_e_ratio_to_avg"
#> [92] "names_e_ratio_to_state_avg"
#> [93] "names_e_state_avg"
#> [94] "names_e_state_pctile"
#> [95] "names_ej"
#> [96] "names_ej_pctile"
#> [97] "names_ej_state"
#> [98] "names_ej_state_pctile"
#> [99] "names_ej_supp"
#> [100] "names_ej_supp_pctile"
#> [101] "names_ej_supp_state"
#> [102] "names_ej_supp_state_pctile"
#> [103] "names_featuresinarea"
#> [104] "names_flag"
#> [105] "names_geo"
#> [106] "names_health"
#> [107] "names_health_avg"
#> [108] "names_health_count"
#> [109] "names_health_pctile"
#> [110] "names_health_ratio_to_avg"
#> [111] "names_health_ratio_to_state_avg"
#> [112] "names_health_state_avg"
#> [113] "names_health_state_pctile"
#> [114] "names_misc"
#> [115] "names_pct_as_fraction_blockgroupstats"
#> [116] "names_pct_as_fraction_ejamit"
#> [117] "names_pct_as_fraction_ejscreenit"
#> [118] "names_sitesinarea"
#> [119] "names_these"
#> [120] "names_these_avg"
#> [121] "names_these_ratio_to_avg"
#> [122] "names_these_ratio_to_state_avg"
#> [123] "names_these_state_avg"
#> [124] "names_wts"
#> [125] "namez"
#> [126] "sictable"
#> [127] "stateinfo"
#> [128] "stateinfo2"
#> [129] "states_shapefile"
#> [130] "statestats"
#> [131] "testinput_address_2"
#> [132] "testinput_address_9"
#> [133] "testinput_address_parts"
#> [134] "testinput_address_table"
#> [135] "testinput_address_table_9"
#> [136] "testinput_address_table_goodnames"
#> [137] "testinput_address_table_withfull"
#> [138] "testinput_fips_blockgroups"
#> [139] "testinput_fips_cities"
#> [140] "testinput_fips_counties"
#> [141] "testinput_fips_states"
#> [142] "testinput_fips_tracts"
#> [143] "testinput_mact"
#> [144] "testinput_naics"
#> [145] "testinput_program_name"
#> [146] "testinput_program_sys_id"
#> [147] "testinput_regid"
#> [148] "testinput_registry_id"
#> [149] "testinput_shapes_2"
#> [150] "testinput_sic"
#> [151] "testinput_xtrac"
#> [152] "testoutput_doaggregate_1000pts_1miles"
#> [153] "testoutput_doaggregate_100pts_1miles"
#> [154] "testoutput_doaggregate_10pts_1miles"
#> [155] "testoutput_ejamit_1000pts_1miles"
#> [156] "testoutput_ejamit_100pts_1miles"
#> [157] "testoutput_ejamit_10pts_1miles"
#> [158] "testoutput_ejamit_fips_cities"
#> [159] "testoutput_ejamit_fips_counties"
#> [160] "testoutput_ejamit_shapes_2"
#> [161] "testoutput_ejscreenRESTbroker_1pts_1miles"
#> [162] "testoutput_ejscreenapi_1pts_1miles"
#> [163] "testoutput_ejscreenapi_plus_5"
#> [164] "testoutput_ejscreenit_5"
#> [165] "testoutput_ejscreenit_50"
#> [166] "testoutput_ejscreenit_500"
#> [167] "testoutput_getblocksnearby_1000pts_1miles"
#> [168] "testoutput_getblocksnearby_100pts_1miles"
#> [169] "testoutput_getblocksnearby_10pts_1miles"
#> [170] "testpoints_10"
#> [171] "testpoints_100"
#> [172] "testpoints_1000"
#> [173] "testpoints_10000"
#> [174] "testpoints_100_dt"
#> [175] "testpoints_5"
#> [176] "testpoints_50"
#> [177] "testpoints_500"
#> [178] "testpoints_bad"
#> [179] "testpoints_overlap3"
#> [180] "testshapes_2"
#> [181] "usastats"
#> [182] "x_anyother"
# not actually sorted within each pkg by default
datapack()
#> Error in datapack(): could not find function "datapack"
# not actually sorted by default
datapack("EJAM")$Item
#> Error in datapack("EJAM"): could not find function "datapack"
##datapack("MASS", simple=T)
# sorted by size if simple=F
##datapack("datasets", simple=F)
x <- datapack(simple = F)
#> Error in datapack(simple = F): could not find function "datapack"
# sorted by size already, to see largest ones among all these pkgs:
tail(x[, 1:3], 20)
#> Error: object 'x' not found
# sorted alphabetically within each pkg
x[order(x$Package, x$Item), 1:2]
#> Error: object 'x' not found
# sorted alphabetically across all the pkgs
x[order(x$Item), 1:2]
#> Error: object 'x' not found
# datasets as lazyloaded objects vs. files installed with package
topic = "fips" # or "shape" or "latlon" or "naics" or "address" etc.
# datasets / R objects
cbind(data.in.package = sort(grep(topic, EJAM:::datapack()$Item, value = T)))
#> Get more info with datapack(simple = FALSE)
#>
#> ignoring sortbysize because simple=TRUE
#>
#> data.in.package
#> [1,] "testinput_fips_blockgroups"
#> [2,] "testinput_fips_cities"
#> [3,] "testinput_fips_counties"
#> [4,] "testinput_fips_states"
#> [5,] "testinput_fips_tracts"
#> [6,] "testoutput_ejamit_fips_cities"
#> [7,] "testoutput_ejamit_fips_counties"
# files
cbind(files.in.package = sort(basename(testdata(topic, quiet = T))))
#> files.in.package
#> [1,] "cities_2.xlsx"
#> [2,] "counties_in_AL_detailed.xlsx"
#> [3,] "counties_in_Alabama.xlsx"
#> [4,] "counties_in_Delaware.xlsx"
#> [5,] "counties_in_Delaware_invalid.xlsx"
#> [6,] "county_10.xlsx"
#> [7,] "county_100.xlsx"
#> [8,] "county_1000.xlsx"
#> [9,] "county_state_300.xlsx"
#> [10,] "fips"
#> [11,] "state_10.xlsx"
#> [12,] "state_50.xlsx"
#> [13,] "state_county_tract_10.xlsx"
#> [14,] "tract_10.csv"
#> [15,] "tract_100.csv"
#> [16,] "tract_1000.csv"
#> [17,] "tract_state_285.xlsx"