From bab5ee30d655ba98f17aeae3d98152d6890a888d Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Fri, 12 Jan 2018 11:47:02 -0600 Subject: [PATCH] stargin 97 metadata and import See #3 --- dal/flow-97.R | 52 +++ dal/import-79-metadata.R | 26 +- dal/import-97-metadata.R | 355 ++++++++++++++++++ dal/import-97-raw.R | 159 ++++++++ .../metadata/tables-97/LUExtractSource.csv | 12 + data-public/metadata/tables-97/LUGender.csv | 4 + .../metadata/tables-97/LUMarkerEvidence.csv | 9 + .../metadata/tables-97/LUMarkerType.csv | 29 ++ .../metadata/tables-97/LUMultipleBirth.csv | 6 + .../metadata/tables-97/LURaceCohort.csv | 4 + .../metadata/tables-97/LURelationshipPath.csv | 6 + .../tables-97/{lu-roster.csv => LURoster.csv} | 0 data-public/metadata/tables-97/LUTristate.csv | 4 + data-public/metadata/tables-97/LUYesNo.csv | 7 + data-public/metadata/tables-97/MzManual.csv | 209 +++++++++++ .../metadata/tables-97/RosterAssignment.csv | 51 +++ data-public/metadata/tables-97/_mapping.csv | 18 + utility/connectivity.R | 37 +- 18 files changed, 960 insertions(+), 28 deletions(-) create mode 100644 dal/flow-97.R create mode 100644 dal/import-97-metadata.R create mode 100644 dal/import-97-raw.R create mode 100644 data-public/metadata/tables-97/LUExtractSource.csv create mode 100644 data-public/metadata/tables-97/LUGender.csv create mode 100644 data-public/metadata/tables-97/LUMarkerEvidence.csv create mode 100644 data-public/metadata/tables-97/LUMarkerType.csv create mode 100644 data-public/metadata/tables-97/LUMultipleBirth.csv create mode 100644 data-public/metadata/tables-97/LURaceCohort.csv create mode 100644 data-public/metadata/tables-97/LURelationshipPath.csv rename data-public/metadata/tables-97/{lu-roster.csv => LURoster.csv} (100%) create mode 100644 data-public/metadata/tables-97/LUTristate.csv create mode 100644 data-public/metadata/tables-97/LUYesNo.csv create mode 100644 data-public/metadata/tables-97/MzManual.csv create mode 100644 data-public/metadata/tables-97/RosterAssignment.csv create mode 100644 data-public/metadata/tables-97/_mapping.csv diff --git a/dal/flow-97.R b/dal/flow-97.R new file mode 100644 index 0000000..b6d96d9 --- /dev/null +++ b/dal/flow-97.R @@ -0,0 +1,52 @@ +# This is a hack of https://github.com/OuhscBbmc/miechv-3/blob/master/manipulation/osdh/osdh-flow.R +# That file runs everything (those dozens of files) dynamically. +# This one is hard-coded, and requires one manual stop (to run the C#). +# But since there are so few files, I think it's an acceptable compromise. + +rm(list=ls(all=TRUE)) #Clear the memory for any variables set from any previous runs. + +# ---- load-sources ------------------------------------------------------------ + + +# ---- load-packages ----------------------------------------------------------- +library(magrittr) +requireNamespace("testit") + +# ---- declare-globals --------------------------------------------------------- +path_sources <- c( + # 97 + "dal/import-97-metadata.R", + "dal/import-97-raw.R", + "dal/outcomes/outcomes-97.R" +) + +file.exists(path_sources) +all_sources_exist <- path_sources %>% + purrr::map_lgl(file.exists) %>% + all() +if( !all_sources_exist ) stop("All source files to be run should exist.") + + +# ---- load-data --------------------------------------------------------------- + +# ---- tweak-data -------------------------------------------------------------- + +# ---- run-sources ------------------------------------------------------------- + +message("Preparing to run\n\t", paste(path_sources, collapse="\n\t")) + +(start_time <- Sys.time()) + + +# dir.create(output="./stitched-output/dal/", recursive=T) +knitr::stitch_rmd(script="./dal/import-97-metadata.R", output="./stitched-output/dal/import-97-metadata.md") +knitr::stitch_rmd(script="./dal/import-97-raw.R", output="./stitched-output/dal/import-97-raw.md") + +stop("Now run the C# program, then come back to run the rest of the R scripts.") + +knitr::stitch_rmd(script="./dal/outcomes/outcomes-97.R", output="./stitched-output/dal/outcomes/outcomes-97.md") # dir.create("./stitched-output/dal/outcomes/", recursive=T) + + +message("Completed flow-97 at ", Sys.time(), " (in ", round(elapsed_duration, 2), " mins.)") + +# ---- verify-values ----------------------------------------------------------- diff --git a/dal/import-79-metadata.R b/dal/import-79-metadata.R index 5a08d82..00d4624 100644 --- a/dal/import-79-metadata.R +++ b/dal/import-79-metadata.R @@ -43,15 +43,6 @@ lst_col_types <- list( Active = readr::col_logical(), Notes = readr::col_character() ), - # item_97 = readr::cols_only( - # ID = readr::col_integer(), - # Label = readr::col_character(), - # MinValue = readr::col_integer(), - # MinNonnegative = readr::col_integer(), - # MaxValue = readr::col_integer(), - # Active = readr::col_logical(), - # Notes = readr::col_character() - # ), LUExtractSource = col_types_minimal, LUMarkerEvidence = col_types_minimal, LUGender = col_types_minimal, @@ -138,19 +129,6 @@ lst_col_types <- list( Active = readr::col_integer(), Notes = readr::col_character() ) - # variable_97 = readr::cols_only( - # # ID = readr::col_integer(), - # VariableCode = readr::col_character(), - # Item = readr::col_integer(), - # Generation = readr::col_integer(), - # ExtractSource = readr::col_integer(), - # SurveySource = readr::col_integer(), - # SurveyYear = readr::col_integer(), - # LoopIndex = readr::col_integer(), - # Translate = readr::col_integer(), - # Active = readr::col_integer(), - # Notes = readr::col_character() - # ) ) col_types_mapping <- readr::cols_only( @@ -199,7 +177,7 @@ ds_table rm(directory_in) # rm(col_types_tulsa) # ---- tweak-data -------------------------------------------------------------- -# OuhscMunge::column_rename_headstart(ds_county) #Spit out columns to help write call ato `dplyr::rename()`. +# OuhscMunge::column_rename_headstart(ds_county) #Spit out columns to help write call to `dplyr::rename()`. ds_file <- ds_file %>% dplyr::left_join( ds_mapping, by=c("name"="table_name")) %>% @@ -416,4 +394,4 @@ DBI::dbDisconnect(channel); rm(channel) RODBC::odbcClose(channel_rodbc); rm(channel_rodbc) duration_in_seconds <- round(as.numeric(difftime(Sys.time(), start_time, units="secs"))) -cat("File completed by `", Sys.info()["user"], "` at ", strftime(Sys.time(), "%Y-%m-%d, %H:%M %z"), " in ", duration_in_seconds, " seconds.", sep="") +cat("`import-79-metadata.R` file completed by `", Sys.info()["user"], "` at ", strftime(Sys.time(), "%Y-%m-%d, %H:%M %z"), " in ", duration_in_seconds, " seconds.", sep="") diff --git a/dal/import-97-metadata.R b/dal/import-97-metadata.R new file mode 100644 index 0000000..aa669f1 --- /dev/null +++ b/dal/import-97-metadata.R @@ -0,0 +1,355 @@ +# knitr::stitch_rmd(script="./dal/import-79-metadata.R", output="./stitched-output/dal/import-metadata.md") # dir.create(output="./stitched-output/dal/", recursive=T) +rm(list=ls(all=TRUE)) #Clear the variables from previous runs. + +# ---- load-sources ------------------------------------------------------------ +# Call `base::source()` on any repo file that defines functions needed below. Ideally, no real operations are performed. +base::source("utility/connectivity.R") + +# ---- load-packages ----------------------------------------------------------- +# Attach these package(s) so their functions don't need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path +library(magrittr , quietly=TRUE) + +# Verify these packages are available on the machine, but their functions need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path +requireNamespace("readr" ) +requireNamespace("tidyr" ) +requireNamespace("tibble" ) +requireNamespace("purrr" ) +requireNamespace("dplyr" ) #Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr). +requireNamespace("testit" ) #For asserting conditions meet expected patterns. +requireNamespace("RODBC" ) #For communicating with SQL Server over a locally-configured DSN. Uncomment if you use 'upload-to-db' chunk. +requireNamespace("odbc" ) #For communicating with SQL Server over a locally-configured DSN. Uncomment if you use 'upload-to-db' chunk. + +# ---- declare-globals --------------------------------------------------------- +# Constant values that won't change. +directory_in <- "data-public/metadata/tables-97" + +col_types_minimal <- readr::cols_only( + ID = readr::col_integer(), + Label = readr::col_character(), + Active = readr::col_logical(), + Notes = readr::col_character() +) + +# The order of this list matters. +# - Tables are WRITTEN from top to bottom. +# - Tables are DELETED from bottom to top. +lst_col_types <- list( + item = readr::cols_only( + ID = readr::col_integer(), + Label = readr::col_character(), + MinValue = readr::col_integer(), + MinNonnegative = readr::col_integer(), + MaxValue = readr::col_integer(), + Active = readr::col_logical(), + Notes = readr::col_character() + ), + LUExtractSource = col_types_minimal, + LUMarkerEvidence = col_types_minimal, + LUGender = col_types_minimal, + LUMarkerType = readr::cols_only( + ID = readr::col_integer(), + Label = readr::col_character(), + Explicit = readr::col_integer(), + Active = readr::col_logical(), + Notes = readr::col_character() + ), + LUMultipleBirth = col_types_minimal, + LURaceCohort = col_types_minimal, + LURelationshipPath = col_types_minimal, + LURosterGen1 = col_types_minimal, + LUTristate = col_types_minimal, + LUYesNo = col_types_minimal, + MzManual = readr::cols_only( + ID = readr::col_integer(), + SubjectTag_S1 = readr::col_integer(), + SubjectTag_S2 = readr::col_integer(), + MultipleBirthIfSameSex = readr::col_integer(), + IsMz = readr::col_integer(), + Undecided = readr::col_integer(), + Related = readr::col_integer(), + Notes = readr::col_character() + ), + RosterAssignment = readr::cols_only( + ID = readr::col_integer(), + ResponseLower = readr::col_integer(), + ResponseUpper = readr::col_integer(), + Freq = readr::col_integer(), + Resolved = readr::col_integer(), + R = readr::col_double(), + RBoundLower = readr::col_double(), + RBoundUpper = readr::col_double(), + ShareBiodad = readr::col_integer(), + ShareBiomom = readr::col_integer(), + ShareBiograndparent = readr::col_integer(), + Inconsistent = readr::col_integer(), + Notes = readr::col_character(), + ResponseLowerLabel = readr::col_character(), + ResponseUpperLabel = readr::col_character() + ), + variable = readr::cols_only( + # ID = readr::col_integer(), + VariableCode = readr::col_character(), + Item = readr::col_integer(), + ExtractSource = readr::col_integer(), + SurveyYear = readr::col_integer(), + LoopIndex = readr::col_integer(), + Translate = readr::col_integer(), + Notes = readr::col_character(), + Active = readr::col_integer(), + Notes = readr::col_character() + ) +) + +col_types_mapping <- readr::cols_only( + table_name = readr::col_character(), + schema_name = readr::col_character(), + enum_name = readr::col_character(), + # enum_file = readr::col_character(), + c_sharp_type = readr::col_character(), + convert_to_enum = readr::col_logical() +) + +# ---- load-data --------------------------------------------------------------- +start_time <- Sys.time() + +ds_mapping <- readr::read_csv(file.path(directory_in, "_mapping.csv"), col_types=col_types_mapping) +ds_mapping + + +ds_file <- lst_col_types %>% + tibble::enframe(value = "col_types") %>% + dplyr::mutate( + path = file.path(directory_in, paste0(name, ".csv")), + # col_types = purrr::map(name, function(x) lst_col_types[[x]]), + exists = purrr::map_lgl(path, file.exists) + ) %>% + dplyr::select(name, path, dplyr::everything()) +ds_file + +testit::assert("All metadata files must exist.", all(ds_file$exists)) + +ds_entries <- ds_file %>% + # dplyr::slice(15) %>% + dplyr::select(name, path, col_types) %>% + dplyr::mutate( + entries = purrr::pmap(list(file=.$path, col_types=.$col_types), readr::read_csv, comment = "#") + ) +ds_entries + +# d <- readr::read_csv("data-public/metadata/tables/variable_97.csv", col_types=lst_col_types$variable_97, comment = "#") +# readr::problems(d) +# ds_entries$entries[15] + +ds_table <- database_inventory() +ds_table + +rm(directory_in) # rm(col_types_tulsa) + +# ---- tweak-data -------------------------------------------------------------- +# OuhscMunge::column_rename_headstart(ds_county) #Spit out columns to help write call to `dplyr::rename()`. + +ds_file <- ds_file %>% + dplyr::left_join( ds_mapping, by=c("name"="table_name")) %>% + dplyr::mutate( + table_name = paste0("tbl", name), + sql_delete = glue::glue("DELETE FROM {schema_name}.{table_name};") + # table_name = paste0(schema_name, ".tbl", name), + # sql_delete = paste0("DELETE FROM ", table_name) + ) %>% + dplyr::left_join( + ds_entries %>% + dplyr::select(name, entries) + , by="name" + ) +rm(ds_entries) + +ds_file$entries %>% + purrr::walk(print) + +# ds_file %>% +# dplyr::group_by(name) %>% +# dplyr::mutate( +# a = purrr::map_int(entries, ~max(nchar(.), na.rm=T)) +# ) %>% +# dplyr::ungroup() %>% +# dplyr::pull(a) + + +# ds_file %>% +# dplyr::select(name, entries) %>% +# tibble::deframe() %>% +# purrr::map(~max(nchar(.), na.rm=T)) + +# lst_ds %>% +# purrr::map(nrow) +# lst_ds %>% +# purrr::map(readr::spec) + +ds_file$table_name +ds_file + +# ---- convert-to-enum --------------------------------------------------------- +create_enum_body <- function( d ) { + tab_spaces <- " " + labels <- dplyr::if_else( d$Active , d$Label, paste("//", d$Label)) + comments <- dplyr::if_else(is.na(d$Notes ), "" , paste("//", d$Notes)) + + paste0(sprintf("%s%-60s = %5s, %s\n", tab_spaces, labels, d$ID, comments), collapse="") +} + +# ds_file %>% +# dplyr::filter(name=="LURelationshipPath") %>% +# dplyr::pull(entries) + +ds_enum <- ds_file %>% + dplyr::filter(convert_to_enum) %>% + dplyr::select(enum_name, entries, c_sharp_type) %>% + dplyr::mutate( + enum_header = paste0("\npublic enum ", .$enum_name, " {\n"), + enum_body = purrr::map_chr(.$entries, create_enum_body), + enum_footer = "}\n", + enum_cs = paste0(enum_header, enum_body, enum_footer) + ) %>% + dplyr::select(-enum_header, -enum_body, -enum_footer) + +ds_enum %>% + dplyr::pull(enum_cs) %>% + cat() + +# ---- verify-values ----------------------------------------------------------- +# Sniff out problems + +# ---- specify-columns-to-upload ----------------------------------------------- + + +# ---- upload-to-db ---------------------------------------------------------- +# lst_ds %>% +# purrr::map(function(x)paste(names(x))) + +ds_table_process <- ds_table %>% + dplyr::filter(schema_name == "Process") %>% + dplyr::mutate( + # sql_truncate = glue::glue("TRUNCATE TABLE {schema_name}.{table_name};") + sql_truncate = glue::glue("DELETE FROM {schema_name}.{table_name};") + ) + +# Open channel +channel <- open_dsn_channel_odbc() +DBI::dbGetInfo(channel) + +channel_rodbc <- open_dsn_channel_rodbc() +RODBC::odbcGetInfo(channel_rodbc) + +# Clear process tables +delete_results_process <- ds_table_process$sql_truncate %>% + purrr::set_names(ds_table_process$table_name) %>% + rev() %>% + purrr::map(DBI::dbGetQuery, conn=channel) +delete_results_process + +# Delete metadata tables +# delete_result <- RODBC::sqlQuery(channel, "DELETE FROM [NlsLinks].[Metadata].[tblVariable]", errors=FALSE) +delete_results_metadata <- ds_file$sql_delete %>% + purrr::set_names(ds_file$table_name) %>% + rev() %>% + purrr::map(DBI::dbGetQuery, conn=channel) + +# DBI::dbGetQuery(conn=channel, ds_file$sql_delete[15]) +delete_results_metadata + +# d <- ds_file %>% +# dplyr::select(table_name, entries) %>% +# dplyr::filter(table_name=="Enum.tblLURosterGen1") %>% +# tibble::deframe() %>% +# .[[1]] + +# d2 <- d[, 1:16] +# RODBC::sqlSave(channel, dat=d, tablename="Enum.tblLURosterGen1", safer=TRUE, rownames=FALSE, append=TRUE) + +# Upload metadata tables +purrr::pmap_int( + list( + ds_file$entries, + ds_file$table_name, + ds_file$schema_name + ), + function( d, table_name, schema_name ) { + # browser() + # DBI::dbWriteTable( + # conn = channel, + # name = table_name, + # schema = schema_name, + # value = d, + # append = F + # ) + RODBC::sqlSave( + channel = channel_rodbc, + dat = d, + # tablename = table_name, + tablename = paste0(schema_name, ".", table_name), + safer = TRUE, # Don't keep the existing table. + rownames = FALSE, + append = TRUE + ) + } +) #%>% +# purrr::set_names(ds_file$table_name) +# a <- ds_file$entries[[15]] +# table(a$ID) + +# odbc::dbWriteTable( +# conn = channel, +# name = DBI::SQL("Metadata.tblvariable_97"), +# # name = "tblvariable_97", +# # schema = "Metadata", +# value = ds_file$entries[[16]], +# append = T +# ) + +# for( i in seq_len(nrow(ds_file)) ) { +# message(glue::glue("Uploading from `{ basename(ds_file$path)[i]}` to `{ds_file$table_name[i]}`.")) +# +# d <- ds_file$entries[[i]] +# print(d) +# +# # RODBC::sqlQuery(channel, ds_extract$sql_truncate[i], errors=FALSE) +# +# # d_peek <- RODBC::sqlQuery(channel, ds_extract$sql_select[i], errors=FALSE) +# # +# # missing_in_extract <- setdiff(colnames(d_peek), colnames(d)) +# # missing_in_database <- setdiff(colnames(d), colnames(d_peek)) +# # +# # d_column <- tibble::tibble( +# # db = colnames(d), +# # extract = colnames(d_peek) +# # ) %>% +# # dplyr::filter(db != extract) +# # +# # RODBC::sqlSave( +# # channel = channel, +# # dat = d, +# # tablename = ds_extract$table_name[i], +# # safer = TRUE, # Don't keep the existing table. +# # rownames = FALSE, +# # append = TRUE +# # ) %>% +# # print() +# +# OuhscMunge::upload_sqls_rodbc( +# d = d, +# table_name = ds_file$table_name[i] , +# dsn_name = "local-nlsy-links", +# clear_table = T, +# create_table = F +# ) +# +# +# message(glue::glue("{format(object.size(d), units='MB')}")) +# } + +# Close channel +DBI::dbDisconnect(channel); rm(channel) +RODBC::odbcClose(channel_rodbc); rm(channel_rodbc) + +duration_in_seconds <- round(as.numeric(difftime(Sys.time(), start_time, units="secs"))) +cat("`import-97-metadata.R` file completed by `", Sys.info()["user"], "` at ", strftime(Sys.time(), "%Y-%m-%d, %H:%M %z"), " in ", duration_in_seconds, " seconds.", sep="") diff --git a/dal/import-97-raw.R b/dal/import-97-raw.R new file mode 100644 index 0000000..3e2af44 --- /dev/null +++ b/dal/import-97-raw.R @@ -0,0 +1,159 @@ +# knitr::stitch_rmd(script="./dal/import-79-raw.R", output="./stitched-output/dal/import-raw.md") # dir.create(output="./stitched-output/dal/", recursive=T) +rm(list=ls(all=TRUE)) #Clear the variables from previous runs. + +# ---- load-sources ------------------------------------------------------------ +# Call `base::source()` on any repo file that defines functions needed below. Ideally, no real operations are performed. +base::source("utility/connectivity.R") + +# ---- load-packages ----------------------------------------------------------- +# Attach these package(s) so their functions don't need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path +library(magrittr , quietly=TRUE) + +# Verify these packages are available on the machine, but their functions need to be qualified: http://r-pkgs.had.co.nz/namespace.html#search-path +requireNamespace("glue" ) +requireNamespace("readr" ) +requireNamespace("tidyr" ) +requireNamespace("tibble" ) +requireNamespace("purrr" ) +requireNamespace("dplyr" ) #Avoid attaching dplyr, b/c its function names conflict with a lot of packages (esp base, stats, and plyr). +requireNamespace("testit" ) #For asserting conditions meet expected patterns. +requireNamespace("RODBC" ) #For communicating with SQL Server over a locally-configured DSN. Uncomment if you use 'upload-to-db' chunk. +requireNamespace("odbc" ) #For communicating with SQL Server over a locally-configured DSN. Uncomment if you use 'upload-to-db' chunk. + +# ---- declare-globals --------------------------------------------------------- +# Constant values that won't change. +directory_in <- "data-unshared/raw" +columns_to_drop <- c("A0002600", "Y2267000") + +ds_extract <- tibble::tribble( + ~table_name , ~file_name + ,"Extract.tblGen1Explicit" , "nlsy79-gen1/Gen1Explicit.csv" + ,"Extract.tblGen1Implicit" , "nlsy79-gen1/Gen1Implicit.csv" + ,"Extract.tblGen1Links" , "nlsy79-gen1/Gen1Links.csv" + ,"Extract.tblGen1Outcomes" , "nlsy79-gen1/Gen1Outcomes.csv" + ,"Extract.tblGen1GeocodeSanitized" , "nlsy79-gen1/Gen1GeocodeSanitized.csv" + # # "Process.tblLURosterGen1" , "nlsy79-gen1/RosterGen1.csv" + # # tblGen1MzDzDistinction2010 + # # + ,"Extract.tblGen2FatherFromGen1" , "nlsy79-gen2/Gen2FatherFromGen1.csv" + ,"Extract.tblGen2ImplicitFather" , "nlsy79-gen2/Gen2ImplicitFather.csv" + ,"Extract.tblGen2Links" , "nlsy79-gen2/Gen2Links.csv" + ,"Extract.tblGen2LinksFromGen1" , "nlsy79-gen2/Gen2LinksFromGen1.csv" + ,"Extract.tblGen2OutcomesHeight" , "nlsy79-gen2/Gen2OutcomesHeight.csv" + ,"Extract.tblGen2OutcomesMath" , "nlsy79-gen2/Gen2OutcomesMath.csv" + ,"Extract.tblGen2OutcomesWeight" , "nlsy79-gen2/Gen2OutcomesWeight.csv" + + # "Extract.tbl97Roster" , "nlsy97/97-roster.csv" +) + +col_types_default <- readr::cols( + .default = readr::col_integer() +) + +checkmate::assert_character(ds_extract$table_name , min.chars=10, any.missing=F, unique=T) +checkmate::assert_character(ds_extract$file_name , min.chars=10, any.missing=F, unique=T) + +# ---- load-data --------------------------------------------------------------- +start_time <- Sys.time() + +ds_extract <- ds_extract %>% + dplyr::mutate( + path = file.path(directory_in, file_name), + extract_exist = file.exists(path), + sql_select = glue::glue("SELECT TOP(100) * FROM {table_name}"), + sql_truncate = glue::glue("TRUNCATE TABLE {table_name}") + ) +testit::assert("All files should be found.", all(ds_extract$extract_exist)) + +print(ds_extract, n=20) + +# ---- tweak-data -------------------------------------------------------------- + +# ---- verify-values ----------------------------------------------------------- +# Sniff out problems + + +# ---- specify-columns-to-upload ----------------------------------------------- + +# ---- upload-to-db ---------------------------------------------------------- + +channel_odbc <- open_dsn_channel_odbc() +DBI::dbGetInfo(channel_odbc) + +channel_rodbc <- open_dsn_channel_rodbc() + +for( i in seq_len(nrow(ds_extract)) ) { # i <- 1L + message(glue::glue("Uploading from `{ds_extract$file_name[i]}` to `{ds_extract$table_name[i]}`.")) + + d <- readr::read_csv(ds_extract$path[i], col_types=col_types_default) + + columns_to_drop_specific <- colnames(d) %>% + intersect(columns_to_drop) + # %>% + # glue::glue("{.}") + + if( length(columns_to_drop_specific) >= 1L ) { + d <- d %>% + dplyr::select_(.dots=paste0("-", columns_to_drop_specific)) + } + + # print(dim(d)) + # purrr::map_chr(d, class) + print(d, n=20) + + #RODBC::sqlQuery(channel_odbc, ds_extract$sql_truncate[i], errors=FALSE) + # d_peek <- RODBC::sqlQuery(channel_odbc, ds_extract$sql_select[i], errors=FALSE) + + DBI::dbGetQuery(channel_odbc, ds_extract$sql_truncate[i]) + + d_peek <- DBI::dbGetQuery(channel_odbc, ds_extract$sql_select[i]) + peek <- colnames(d_peek) + # peek <- DBI::dbListFields(channel_odbc, ds_extract$table_name[i]) + + missing_in_extract <- setdiff(peek , colnames(d)) + missing_in_database <- setdiff(colnames(d), peek ) + + # d_column <- tibble::tibble( + # db = colnames(d), + # extract = peek + # ) %>% + # dplyr::filter(db != extract) + + # system.time({ + # DBI::dbWriteTable( + # conn = channel_odbc, + # name = DBI::SQL(ds_extract$table_name[i]), + # value = d, #[, 1:10], + # # append = T, + # overwrite = T + # ) + # }) + + system.time({ + RODBC::sqlSave( + channel = channel_rodbc, + dat = d, + tablename = ds_extract$table_name[i], + safer = TRUE, # Don't keep the existing table. + rownames = FALSE, + append = TRUE + ) %>% + print() + }) + + # OuhscMunge::upload_sqls_rodbc( + # d = d[1:100, ], + # table_name = ds_extract$table_name[i] , + # dsn_name = "local-nlsy-links-79", + # clear_table = F, + # create_table = T + # ) + + + message(glue::glue("Tibble size: {format(object.size(d), units='MB')}")) +} +DBI::dbDisconnect(channel_odbc); rm(channel_odbc) +RODBC::odbcClose(channel_rodbc); rm(channel_rodbc) + +duration_in_seconds <- round(as.numeric(difftime(Sys.time(), start_time, units="secs"))) +cat("File completed by `", Sys.info()["user"], "` at ", strftime(Sys.time(), "%Y-%m-%d, %H:%M %z"), " in ", duration_in_seconds, " seconds.", sep="") diff --git a/data-public/metadata/tables-97/LUExtractSource.csv b/data-public/metadata/tables-97/LUExtractSource.csv new file mode 100644 index 0000000..e22de9c --- /dev/null +++ b/data-public/metadata/tables-97/LUExtractSource.csv @@ -0,0 +1,12 @@ +ID,Label,Active,Notes +3,Gen1Links ,TRUE, +4,Gen2Links ,TRUE, +5,Gen2LinksFromGen1 ,TRUE, +6,Gen2ImplicitFather ,TRUE, +7,Gen2FatherFromGen1 ,TRUE, +8,Gen1Outcomes ,TRUE, +9,Gen2OutcomesHeight ,TRUE, +10,Gen1Explicit ,TRUE, +11,Gen1Implicit ,TRUE, +12,Gen2OutcomesWeight ,TRUE, +13,Gen2OutcomesMath ,TRUE, diff --git a/data-public/metadata/tables-97/LUGender.csv b/data-public/metadata/tables-97/LUGender.csv new file mode 100644 index 0000000..c2f4139 --- /dev/null +++ b/data-public/metadata/tables-97/LUGender.csv @@ -0,0 +1,4 @@ +ID,Label,Active,Notes +1,Male,TRUE, +2,Female,TRUE, +255,InvalidSkipGen2,TRUE, diff --git a/data-public/metadata/tables-97/LUMarkerEvidence.csv b/data-public/metadata/tables-97/LUMarkerEvidence.csv new file mode 100644 index 0000000..8a67717 --- /dev/null +++ b/data-public/metadata/tables-97/LUMarkerEvidence.csv @@ -0,0 +1,9 @@ +ID,Label,Active,Notes +0,Irrelevant ,TRUE, +1,StronglySupports ,TRUE, +2,Supports ,TRUE, +3,Consistent ,TRUE, +4,Ambiguous ,TRUE, +5,Missing ,TRUE, +6,Unlikely ,TRUE, +7,Disconfirms ,TRUE, diff --git a/data-public/metadata/tables-97/LUMarkerType.csv b/data-public/metadata/tables-97/LUMarkerType.csv new file mode 100644 index 0000000..37f0221 --- /dev/null +++ b/data-public/metadata/tables-97/LUMarkerType.csv @@ -0,0 +1,29 @@ +ID,Label,Explicit,Active,Notes +1,RosterGen1 ,1,TRUE, +2,ShareBiomom ,1,TRUE, +3,ShareBiodad ,1,TRUE, +5,DobSeparation ,0,TRUE, +6,GenderAgreement ,0,TRUE, +10,FatherAsthma ,0,TRUE, +11,BabyDaddyAsthma ,0,TRUE, +12,BabyDaddyLeftHHDate ,0,TRUE, +13,BabyDaddyDeathDate ,0,TRUE, +14,BabyDaddyAlive ,0,TRUE, +15,BabyDaddyInHH ,0,TRUE, +16,BabyDaddyDistanceFromHH ,0,TRUE, +17,Gen2CFatherAlive ,0,TRUE, +18,Gen2CFatherInHH ,0,TRUE, +19,Gen2CFatherDistanceFromHH ,0,TRUE, +30,Gen1BiodadInHH ,0,TRUE, +31,Gen1BiodadDeathAge ,0,TRUE, +32,Gen1BiodadBirthYear ,0,TRUE, +33,Gen1BiodadInHH1979 ,0,FALSE, +34,Gen1BiodadBirthCountry ,0,TRUE, +35,Gen1BiodadBirthState ,0,TRUE, +40,Gen1BiomomInHH ,0,TRUE, +41,Gen1BiomomDeathAge ,0,TRUE, +42,Gen1BiomomBirthYear ,0,TRUE, +43,Gen1BiomomInHH1979 ,0,FALSE, +44,Gen1BiomomBirthCountry ,0,TRUE, +45,Gen1BiomomBirthState ,0,TRUE, +50,Gen1AlwaysLivedWithBothBioparents ,0,TRUE, diff --git a/data-public/metadata/tables-97/LUMultipleBirth.csv b/data-public/metadata/tables-97/LUMultipleBirth.csv new file mode 100644 index 0000000..e5b2777 --- /dev/null +++ b/data-public/metadata/tables-97/LUMultipleBirth.csv @@ -0,0 +1,6 @@ +ID,Label,Active,Notes +0,No,TRUE, +2,Twin,TRUE, +3,Trip,TRUE, +4,TwinOrTrip,TRUE,Currently Then Gen1 algorithm doesn't distinguish. +255,DoNotKnow,TRUE, diff --git a/data-public/metadata/tables-97/LURaceCohort.csv b/data-public/metadata/tables-97/LURaceCohort.csv new file mode 100644 index 0000000..24dbd98 --- /dev/null +++ b/data-public/metadata/tables-97/LURaceCohort.csv @@ -0,0 +1,4 @@ +ID,Label,Active,Notes +1,Hispanic,TRUE, +2,Black,TRUE, +3,Nbnh,TRUE, diff --git a/data-public/metadata/tables-97/LURelationshipPath.csv b/data-public/metadata/tables-97/LURelationshipPath.csv new file mode 100644 index 0000000..f6c1efc --- /dev/null +++ b/data-public/metadata/tables-97/LURelationshipPath.csv @@ -0,0 +1,6 @@ +ID,Label,Active,Notes +1,Gen1Housemates ,TRUE, +2,Gen2Siblings ,TRUE, +3,Gen2Cousins ,TRUE, +4,ParentChild ,TRUE, +5,AuntNiece ,TRUE,Actually (Uncle|Aunt)-(Nephew|Niece) diff --git a/data-public/metadata/tables-97/lu-roster.csv b/data-public/metadata/tables-97/LURoster.csv similarity index 100% rename from data-public/metadata/tables-97/lu-roster.csv rename to data-public/metadata/tables-97/LURoster.csv diff --git a/data-public/metadata/tables-97/LUTristate.csv b/data-public/metadata/tables-97/LUTristate.csv new file mode 100644 index 0000000..1117ba7 --- /dev/null +++ b/data-public/metadata/tables-97/LUTristate.csv @@ -0,0 +1,4 @@ +ID,Label,Active,Notes +0,No,TRUE, +1,Yes,TRUE, +255,DoNotKnow,TRUE, diff --git a/data-public/metadata/tables-97/LUYesNo.csv b/data-public/metadata/tables-97/LUYesNo.csv new file mode 100644 index 0000000..5d38a50 --- /dev/null +++ b/data-public/metadata/tables-97/LUYesNo.csv @@ -0,0 +1,7 @@ +ID,Label,Active,Notes +-6,ValidSkipOrNoInterviewOrNotInSurvey,TRUE, +-3,InvalidSkip,TRUE, +-2,DoNotKnow,TRUE, +-1,Refusal,TRUE, +0,No,TRUE, +1,Yes,TRUE, diff --git a/data-public/metadata/tables-97/MzManual.csv b/data-public/metadata/tables-97/MzManual.csv new file mode 100644 index 0000000..7d4bdc5 --- /dev/null +++ b/data-public/metadata/tables-97/MzManual.csv @@ -0,0 +1,209 @@ +ID,Sample,SubjectTag_S1,SubjectTag_S2,Generation,MultipleBirthIfSameSex,IsMz,Undecided,Related,Notes +1,79,5003,5004,2,2,0,0,1,Very Consistent +3,79,14303,14304,2,2,0,0,1,Different Gender +5,79,15904,15905,2,2,0,0,1,NA +6,79,28805,28806,2,2,0,0,1,Different Gender +8,79,36504,36505,2,2,1,0,1,Twice DZ then once MZ +9,79,67703,67704,2,2,0,0,1,1994-2008 +10,79,73301,73302,2,2,1,0,1,Mostly consistent 1994-2008 +12,79,74301,74302,2,2,0,0,1,Different Gender +13,79,77502,77503,2,2,1,0,1,1994-2006 +14,79,93001,93002,2,2,1,0,1,1994-2000 +15,79,104902,104903,2,2,0,0,1,2000-2008 +16,79,121005,121006,2,2,1,0,1,1994-2008 +17,79,125403,125404,2,2,0,0,1,1994-1998 +18,79,146001,146002,2,2,255,1,1,Last mother response was 1993 +19,79,159605,159606,2,2,0,0,1,1994-2008 +20,79,167101,167102,2,2,0,0,1,Different Gender +21,79,188601,188602,2,2,0,0,1,1994-2002 +22,79,190503,190504,2,2,1,0,1,2000-2008 +23,79,190602,190603,2,3,0,0,1,Different Gender +24,79,190602,190604,2,3,0,0,1,Different Gender +25,79,190603,190604,2,3,0,0,1,Mom reports trips are DZ 2002-2008 +26,79,193701,193702,2,2,0,0,1,Different Gender +27,79,201202,201203,2,2,1,0,1,1994-2008 +28,79,207102,207103,2,2,0,0,1,Different Gender +29,79,215302,215303,2,2,0,0,1,1994-2008 +30,79,217403,217404,2,2,0,0,1,1998-2008 +31,79,229902,229903,2,2,1,0,1,1994-2006 +32,79,233901,233902,2,2,0,0,1,Mostly consistent 1996-2006 +33,79,244402,244403,2,2,0,0,1,1994-2008 +34,79,272301,272302,2,2,0,0,1,Different Gender +35,79,277002,277003,2,2,0,0,1,Different Gender +36,79,279001,279002,2,2,0,0,1,Different Gender +37,79,288601,288602,2,2,0,0,1,1994-2008 +38,79,315001,315002,2,2,0,0,1,Different Gender +39,79,315101,315102,2,2,1,0,1,1994-2008 +40,79,338701,338702,2,2,0,0,1,Different Gender +41,79,342301,342302,2,2,0,0,1,1994-2008 +42,79,342901,342902,2,2,1,0,1,1994-2004 +43,79,345802,345803,2,2,0,0,1,1994-2008 +44,79,350602,350603,2,2,0,0,1,1994-2006 +45,79,352702,352703,2,2,0,0,1,Different Gender +46,79,354405,354406,2,2,0,0,1,"Different Gender (Watch out, mom had two sets of DZ twins)" +47,79,354407,354408,2,2,0,0,1,"1994-2006 (Watch out, mom had two sets of DZ twins)" +48,79,360501,360502,2,2,0,0,1,Different Gender +49,79,365401,365402,2,2,0,0,1,1994-2008 +50,79,368801,368802,2,2,1,0,1,Response on in 1998 +51,79,372904,372905,2,2,0,0,1,Different Gender +52,79,378702,378703,2,2,0,0,1,1998-2008 +53,79,392401,392402,2,2,1,0,1,MZ 2000-2004; DZ 1998 +54,79,393101,393102,2,2,0,0,1,"1994-2008 (Watch out, mom had two sets of DZ twins)" +55,79,393103,393104,2,2,0,0,1,"Different Gender (Watch out, mom had two sets of DZ twins)" +56,79,407401,407402,2,3,0,0,1,Mostly DZ 1994-2008 +57,79,407401,407403,2,3,0,0,1,Different Gender +58,79,407402,407403,2,3,0,0,1,Different Gender +59,79,448306,448307,2,2,1,0,1,1994-2008 +60,79,468302,468303,2,2,0,0,1,Different Gender +61,79,473402,473403,2,2,0,0,1,1994-2008 +62,79,484202,484203,2,2,0,0,1,1994-2008 +63,79,487103,487104,2,2,1,0,1,1998-2008 +65,79,499002,499003,2,2,0,0,1,1998-2008 +66,79,505201,505202,2,2,0,0,1,1998-2004 +67,79,509801,509802,2,2,0,0,1,Different Gender +68,79,560501,560502,2,2,1,0,1,1994-2008 +69,79,565901,565902,2,2,255,1,1,Last mother response was 1993 +70,79,568502,568503,2,2,0,0,1,Different Gender +71,79,584003,584004,2,2,0,0,1,Different Gender +72,79,597305,597306,2,2,0,0,1,Different Gender +73,79,604604,604605,2,2,1,0,1,1994-2006 +74,79,627002,627003,2,2,0,0,1,1996-2008 +75,79,628301,628302,2,2,0,0,1,Different Gender +76,79,635302,635303,2,2,0,0,1,Mostly DZ 1996-2008 +77,79,642902,642903,2,2,1,0,1,Mostly MZ 1994-2004 +78,79,651603,651604,2,2,0,0,1,Different Gender +79,79,661301,661302,2,2,0,0,1,Different Gender +80,79,661402,661403,2,2,0,0,1,Different Gender +81,79,663901,663902,2,2,255,1,1,Last mother response was 1990 +82,79,685401,685402,2,2,1,0,1,1994-2008 +83,79,706502,706503,2,2,1,0,1,MZ 1996-2008; DZ 1994 +84,79,711101,711102,2,2,255,1,1,Last mother response was 1990 +85,79,716902,716903,2,2,0,0,1,Different Gender +86,79,724903,724904,2,2,0,0,1,2000-2004 +87,79,744804,744805,2,2,0,0,1,Mostly DZ 1994-2008 (MZ in 2000) +88,79,750404,750405,2,2,0,0,1,1996-2008 +89,79,760001,760002,2,2,0,0,1,2000-2006 +90,79,767302,767303,2,2,0,0,1,1996-2006 +91,79,780403,780404,2,2,0,0,1,"MZ 1994,2000-2006; DZ 1996-1998" +92,79,783103,783104,2,2,0,0,1,Different Gender +93,79,791406,791407,2,2,255,1,1,Last mother response was 1990 +94,79,795803,795804,2,2,0,0,1,Different Gender +95,79,803105,803106,2,2,0,0,1,Different Gender +96,79,817405,817406,2,2,0,0,1,"DZ 1994, 2000, 2006, 2008; MZ 2002" +97,79,826402,826403,2,2,0,0,1,Different Gender +98,79,827304,827305,2,2,0,0,1,Different Gender +99,79,835904,835905,2,2,0,0,1,1994-2006 +100,79,853001,853002,2,2,0,0,1,Different Gender +101,79,859804,859805,2,2,0,0,1,1994-2008 +102,79,864902,864903,2,2,255,1,1,Mother avoids the twin items +103,79,886902,886903,2,2,1,0,1,1994-2004 +104,79,896703,896704,2,2,1,0,1,"MZ 1994, 2000, 2008; DZ 2006" +105,79,918302,918303,2,2,0,0,1,Different Gender +106,79,923302,923303,2,2,0,0,1,1994-20008 +107,79,930806,930807,2,2,0,0,1,"2004, 2008" +108,79,953601,953602,2,2,1,0,1,1994-2006 +109,79,957602,957603,2,2,1,0,1,2004-2008 +110,79,959601,959602,2,2,255,1,1,Last mother response was 1990 +111,79,963202,963203,2,2,0,0,1,1994-1998 +112,79,964101,964102,2,2,0,0,1,2000-2008 +113,79,991301,991302,2,2,0,0,1,1994-2004 +115,79,1001201,1001202,2,2,255,1,1,Last mother response was 1990 +116,79,1015601,1015602,2,2,0,0,1,Different Gender +117,79,1019102,1019103,2,2,1,1,1,"mixed up answers (Watch out, mom had two sets of twins, but typically only one response)" +118,79,1019104,1019105,2,2,0,0,1,"Different Gender (Watch out, mom had two sets of twins, but typically only one response)" +119,79,1021103,1021104,2,2,0,0,1,Different Gender +120,79,1040101,1040102,2,2,0,0,1,Different Gender +121,79,1048403,1048404,2,2,0,0,1,Different Gender +122,79,1058202,1058203,2,2,0,0,1,1994-2008 +123,79,1063402,1063403,2,2,1,0,1,1994-2008 +124,79,1115501,1115502,2,2,0,0,1,Different Gender +125,79,1119103,1119104,2,2,255,1,1,Last mother response was 1984 +126,79,1148601,1148602,2,2,255,1,1,Last mother response was 1984 +127,79,1173301,1173302,2,2,255,1,1,Last mother response was 1984 +128,79,1173901,1173902,2,2,255,1,1,Last mother response was 1984 +129,79,1179802,1179803,2,2,1,0,1,1994-2006 +130,79,1185904,1185905,2,2,0,0,1,1996-2006 +131,79,1206601,1206602,2,2,1,0,1,1994-2008 +132,79,1211303,1211304,2,2,0,0,1,Different Gender +133,79,1214601,1214602,2,2,0,0,1,"DZ 1994, 1998, 2006, 2008; MZ 2004" +134,79,1255403,1255404,2,2,0,0,1,Different Gender +135,79,1257402,1257403,2,2,255,1,1,Last mother response was 1984 +136,79,890202,890203,2,2,0,0,1,Different Gender +141,79,30000,30100,1,0,0,0,1,Missing in 1994; dob 4/15/1961 and 5/15/1961; Both say cousins in roster; neither acknowledge each other in 2006 biomom/dad items +142,79,54000,54100,1,2,1,0,1,Both say MZ in 1994; Both say brothers in roster; both say share biomom & biodad +143,79,88600,88700,1,2,0,0,1,88600 says not twins in 1994; 88700 says DZ in 1994; both say sisters in roster; both say share biomom & biodad +145,79,98300,98400,1,0,0,0,0,"No twin listed in 1994; 98300 invalidly skipped roster, 98400 said OtherNonRelative; born in 1958-Dec and 1958-Nov; 98300 hasn't responded since 1984" +146,79,98300,98500,1,0,0,0,0,No twin listed in 1994; Both invalidly skipped roster; didn't mention any siblings in 2006 biomom/dad items +147,79,98400,98500,1,0,0,0,0,No twin listed in 1994; Both invalidly skipped roster; didn't mention any siblings in 2006 biomom/dad items +149,79,103900,104000,1,0,0,0,0,"No twin listed in 1994; Both invalidly skipped roster; born Jan 1960 and Dec 1959; 103900 doesn't acknowledge in 2006 biomom/dad items, 104000 hasn't responded since 2002" +152,79,110300,110400,1,2,255,1,1,No twin listed in 1994; Both say sisters in roster; 110300 says share biomom&biodad; 110400 didn't answer 2006 & 2008 survey +154,79,117800,117900,1,2,255,1,1,No twin listed in 1994; Both say sisters in roster; both say share biomom & biodad +155,79,157600,157700,1,2,1,0,1,"157700 says MZ in 1994, No twin listed for 157600; both say brothers in roster; 157700 says share biomom & biodad; 157600 didn't answer 2006 survey" +156,79,163500,163600,1,2,255,1,1,No twin listed in 1994; Both say brothers in roster; both say share biomom & biodad +158,79,233500,233600,1,0,0,0,NA,No twin listed in 1994; both say sisters in roster; didn't acknowledge each other in 2006 biomom/biodad roster; born May 1957 and April 1957 +160,79,266900,267000,1,2,0,0,1,Both say DZ in 1994; both say sisters in roster; both say share biomom & biodad +161,79,282400,282500,1,2,0,0,1,"282400 says DZ in 1994, 282500 didn't list twins; both say sisters in roster; neither have responded since 1998 & 2000" +162,79,284600,284700,1,2,1,0,1,"284700 says MZ in 1994, 284600 didn't list twins; both say sisters in roster; 284700 says shares biomom & biodad, 284600 hasn't responded since 2000" +163,79,296700,296800,1,0,0,0,1,"No twin listed in 1994; both say sisters in roster; 296800 says share biom & biodad in 2006, 296700 hasn't responded since 1998; born Dec 1964 & Nov 1964" +164,79,300900,301000,1,2,0,0,1,"301000 say they're DZ trips (but only two sibs listed), 300900 didn't list twins;both say sisters in roster; 300900 says share biomom & biodad in 2006, 301000 didn't response to explicit items" +165,79,342800,342900,1,2,1,0,1,Both say MZ in 1994; both say sisters in roster; both say share biomom & biodad +167,79,345700,345800,1,2,1,0,1,Both say MZ in 1994; both say sisters in roster; both say share biomom & biodad +169,79,347300,347400,1,2,1,0,1,"Both say MZ in 1994; both say brothers in roster; 347300 says shares biomom & biodad in 2006, 347400 hasn't responded since 2002" +170,79,365700,365800,1,2,0,0,1,Both say DZ in 1994; both say brothers in roster; both say share biomom & biodad +171,79,410300,410500,1,2,0,0,1,"410500 says DZ in 1994, 410400 didn't list twin; both say brothers on roster; both says share biomom & biodad; (they're inconsistent about biomom/dad with their younger sister, 410400)" +173,79,422400,1205400,1,2,0,0,0,No twin listed in 1994; both invalidly skipped roster; neither acknowledge each other in biomom/dad items; born Apr 1957 & Mar 1957; ID numbering is weird too. Geocode variables say they were born 14 days apart. +175,79,460500,460700,1,0,0,0,NA,"No twin listed in 1994; Both say cousins in roster; 460700 doesn't acknowledge in biomom/dad items, 460500 hasn't responded since 2004" +176,79,462500,462700,1,0,0,0,NA,No twin listed in 1994; Both say cousins in roster; neither acknowledge each other in biomom/dad items; born Sept 1957 & Aug 1957 +178,79,468200,468300,1,0,0,0,NA,No twin listed in 1994; Both say cousins in roster; neither acknowledge each other in biomom/dad items; born July 1957 & Aug 1957 +179,79,483000,483100,1,2,1,0,1,Both say MZ in 1994; both say brothers in roster; both say share biomom & biodad +180,79,496600,496700,1,2,1,0,1,Both say MZ in 1994; both say sisters in roster; both say share biomom & biodad +182,79,498800,498900,1,2,0,0,1,Both say DZ in 1994; both say sisters in roster; both say share biomom & biodad +183,79,541300,541400,1,2,255,1,1,"No twins listed in 1994 (didn't response to 1994 survey); both say brothers in roster; neither acknowledge each other in biomom/dad items (541300 answered 2006 survey); completed 2008 and 2010, respectively" +185,79,557000,557100,1,2,255,1,NA,"No twins listed in 1994; both say sisters in roster; neither acknowledge each other in biomom/dad items; haven't responded since 1998 and 1993, respectively" +186,79,562100,562200,1,2,0,0,1,"Both say DZ in 1994; both say brothers in roster; both say share biomom & biodad; 562100 says share biomom & biodad, " +188,79,568600,568700,1,2,0,0,1,Both say DZ in 1994; both say sisters in roster; both say share biomom & biodad +189,79,572000,572100,1,2,255,1,NA,No twins listed in 1994 (572100 didn't respond to 1994 or 2006 survey); both say sisters in roster; neither acknowledge each other in biomom/dad items (572100 answered 2006 survey) +190,79,578500,578600,1,2,0,0,1,5578600 says DZ in 1994 (578500 didn't respond to 1994 or 2006 survey); both say brothers in roster; 5578600 hasn't responded since 2004 +191,79,602000,602100,1,2,1,0,1,"602100 says MZ in 1994, 602000 didn't have a twin listed; both say brothers in roster; both say share biomom & biodad" +193,79,604200,604300,1,2,1,0,1,"604300 says MZ in 1994, 604200 didn't have a twin listed; both say sisters in roster; both say share biomom & biodad" +194,79,625800,625900,1,0,0,0,NA,No twins listed in 1994; both say cousins in roster; neither acknowledge each other in biomom/dad items +195,79,656000,656100,1,0,0,1,NA,"Both invalidly skipped roster; born Feb and Jan 1957; last surveys were 1990, so no twin or biomom/dad items" +196,79,656400,656700,1,0,0,0,0,"Both say OtherNonRelative in roster; born July and June 1957; last surveys were 1990, so no twin or biomom/dad items" +202,79,668300,668400,1,2,0,0,1,"668400 says DZ in 1994, and share biomom & biodad; both says sisters in roster; 668300 hasn't responded since 1989, so no twin or biomom/dad" +203,79,706900,707000,1,2,0,0,1,"706900 says DZ in 1994, no twin listed for 707000; both say brothers in roster; both say share biomom & biodad" +204,79,717800,718000,1,0,0,1,NA,No twin listed in 1994; both say sisters in roster; neither acknowledged in biomom/dad items; born in April and May 1959; ID numbering skips too +205,79,728200,728300,1,2,0,0,1,Both say DZ in 1994; both say sisters in roster; both say share biomom & biodad +206,79,733500,733600,1,0,0,1,1,"No twin listed in 1994; both say brothers in roster; 733600 says share biom & biodad, 733500 didn't respond in 2006; born in April and May 1958" +207,79,736500,736600,1,2,255,1,NA,"Both say brothers in roster; last surveys were 1990, so no twin or biomom/dad items" +208,79,756800,756900,1,2,0,0,1,"756800 says DZ in 1994, no twin listed for 756900; both say brothers in roster; both say share biomom & biodad" +209,79,767600,767700,1,2,1,0,1,"767600 says MZ in 1994, no twin listed for 767700; both say brothers in roster; both say share biomom & biodad" +210,79,771700,771800,1,2,255,1,1,"771700 says DZ, 771800 says MZ in 1994; both says brothers in roster; 771700 says shares biomom & biodad, 771800 hasn't responded since 2004" +212,79,774600,774700,1,0,0,0,0,"Both say OtherNonRelative in roster; neither has hasn't responded since 1990, so no twin or biomom/dad" +213,79,777100,777200,1,0,0,0,0,"777200 says OtherNonRelative in roster, other invalidly skipped; neither has hasn't responded since 1990, so no twin or biomom/dad; born July and August 1957" +214,79,786000,786100,1,2,255,1,1,No twin listed in 1994; both say brothers in roster; both say share biomom& biodad +215,79,812000,812100,1,0,0,0,0,Both say OtherNonRelative in roster; born Jan and Feb 1957; neither acknowledged each other in 1994 twin items or 2006 biomom/dad items +216,79,812000,812300,1,0,0,0,0,Both say OtherNonRelative in roster; born Jan and March 1957; neither acknowledged each other in 1994 twin items or 2006 biomom/dad items +217,79,812100,812300,1,0,0,0,0,Both say OtherNonRelative in roster; born Feb and March 1957; neither acknowledged each other in 1994 twin items or 2006 biomom/dad items +218,79,855700,855800,1,2,0,0,1,"855800 says DZ in 1994, no twin listed for 855700; both say sisters in roster; both say share biomom& biodad" +220,79,871600,871700,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items; born Jan and Feb 1959" +221,79,880700,1223500,1,0,0,0,NA,"Both invalidly skip roster; last surveys were 1990, so no twin or biomom/dad items; born March and Feb 1960; ID numbering is weird" +222,79,883600,883700,1,2,255,1,1,"Both say brother in roster; last surveys were 1990, so no twin or biomom/dad items" +223,79,899200,899300,1,0,0,1,NA,"No twin listed in 1994; both say brother in roster; 899200 says shares both biomom & biodad, 899300 says shares neither biomom & biodad" +224,79,941000,941100,1,2,255,1,1,"Both say brother in roster; last surveys were 1990, so no twin or biomom/dad items" +225,79,944700,944900,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items; born Apr and May 1957" +226,79,944800,945100,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items" +227,79,950900,951100,1,0,0,1,NA,"950900 invalidly skipped roster, 951100 refused roster; last surveys were 1990, so no twin or biomom/dad items; born May and Apr 1960" +228,79,956200,956400,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1983 & 1990, so no twin or biomom/dad items; born May and June 1959" +230,79,959200,959400,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items; born May and June 1959" +231,79,971500,971600,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items; born Aug and Sept 1958" +233,79,973700,973800,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items" +234,79,989700,989800,1,2,0,0,1,"989700 says DZ in 1994 and share biomom & biodad in 2006; 989800 hasn't responded since 1993, so no twin or biomom/dad item; both say sisters on roster" +235,79,991100,991200,1,2,0,0,1,"991200 says DZ in 1994, 991100 didn't respond in 1994 ; both say brothers in roster; neither responded in 2006, so no biomom/dad item" +237,79,1008700,1008800,1,2,255,1,1,"Both say sister in roster; last surveys were 1990, so no twin or biomom/dad items" +238,79,1039300,1039500,1,0,0,0,0,"Both say OtherNonRelative in roster; last surveys were 1990, so no twin or biomom/dad items" +239,79,1200100,1200400,1,0,0,0,0,No twin listed in 1994; Both say stepsister in roster; 1200400 didn't acknowledge other in biomom/dad items; 1200100 didn't respond in 2006; born Aug and July 1961 +240,79,1227200,1227300,1,0,0,0,0,"Both say sister in roster; last surveys were 1990, so no twin or biomom/dad items; born March and April 1957" +241,79,1232000,1232100,1,2,255,0,1,"Both say sister in roster; last surveys were 1990, so no twin or biomom/dad items" +243,79,598500,598600,1,2,255,1,1,"598500 says MZ, 598600 says DZ in 1994; both says sisters in roster; both say share biomom & biodad" +244,79,179902,179903,2,2,0,0,1,Different Gender +245,79,443101,443102,2,2,255,1,1,The Gen2s never completed a survey. Born 50 years after Gen1 mom (1961 vs 2011) diff --git a/data-public/metadata/tables-97/RosterAssignment.csv b/data-public/metadata/tables-97/RosterAssignment.csv new file mode 100644 index 0000000..c4b2e2c --- /dev/null +++ b/data-public/metadata/tables-97/RosterAssignment.csv @@ -0,0 +1,51 @@ +ID,ResponseLower,ResponseUpper,Freq,Resolved,R,RBoundLower,RBoundUpper,SameGeneration,ShareBiodad,ShareBiomom,ShareBiograndparent,Inconsistent,Notes,ResponseLowerLabel,ResponseUpperLabel +1,-3,-3,67,0,NA,0,1,255,255,255,255,0,,Invalid Skip,Invalid Skip +2,-3,-1,11,0,NA,0,1,255,255,255,255,0,,Invalid Skip,Refusal +3,-3,33,6,1,0,0,0,1,0,0,0,0,,Invalid Skip, +4,-3,36,35,1,0,0,0,255,0,0,0,0,,Invalid Skip, +5,-1,18,1,1,0.125,0.125,0.125,0,0,0,1,0,,Refusal, +6,-1,36,3,1,0,0,0,255,0,0,0,0,,Refusal, +7,1,1,167,1,0,0,0,1,0,0,0,0,,SPOUSE,SPOUSE +8,1,33,1,1,0,0,0,1,0,0,0,0,,SPOUSE, +9,1,36,1,1,0,0,0,1,0,0,0,0,,SPOUSE, +10,1,57,1,1,0,0,0,1,0,0,0,0,,SPOUSE, +11,6,6,1316,0,NA,0.25,1,1,255,255,255,0,,, +12,6,7,2212,0,NA,0.25,1,1,255,255,255,0,,, +13,7,7,1101,0,NA,0.25,1,1,255,255,255,0,,, +14,12,12,1,0,NA,0,0.125,0,0,0,1,1,They cannot both be uncles to each other,, +15,12,17,5,1,0.125,0.125,0.125,0,0,0,1,0,,, +16,12,18,4,1,0.125,0.125,0.125,0,0,0,1,0,,, +17,13,17,9,1,0.125,0.125,0.125,0,0,0,1,0,,, +18,13,18,10,1,0.125,0.125,0.125,0,0,0,1,0,,, +19,13,52,1,0,NA,0,0.125,255,0,0,0,1,they don't even agree on the same generation,, +20,13,53,1,0,NA,0,0.125,255,0,0,0,1,they don't even agree on the same generation,, +21,16,16,65,1,0.125,0.125,0.125,1,0,0,255,0,Is 1/8 too high? No items will distinguish half-cousins,, +22,16,36,2,1,0,0,0,1,0,0,0,0,'cousin' is loosely defined by one of them,, +23,16,62,2,1,0.125,0.125,0.125,1,0,0,1,0,,, +24,20,38,1,1,0,0,0,0,0,0,0,0,,, +25,21,38,1,1,0,0,0,0,0,0,0,0,,, +26,26,26,7,1,0,0,0,1,0,0,0,0,,, +27,26,27,18,1,0,0,0,1,0,0,0,0,,, +28,26,58,1,1,0,0,0,1,0,0,0,0,,, +29,27,27,11,1,0,0,0,1,0,0,0,0,,, +30,28,30,1,1,0,0,0,0,0,0,0,0,tight age range for two generations,, +31,32,32,1,1,0,0,0,255,0,0,0,0,,, +32,33,33,5,1,0,0,0,1,0,0,0,0,,, +33,33,36,2,1,0,0,0,1,0,0,0,0,,, +34,34,36,4,1,0,0,0,255,0,0,0,0,,, +35,36,36,137,1,0,0,0,255,0,0,0,0,,, +36,39,39,19,1,0,0,0,1,0,0,0,0,,, +37,39,40,21,1,0,0,0,1,0,0,0,0,,, +38,40,40,4,1,0,0,0,1,0,0,0,0,,, +39,52,52,6,1,0,0,0,1,0,0,0,0,,, +40,52,53,5,1,0,0,0,1,0,0,0,0,,, +41,53,53,2,1,0,0,0,1,0,0,0,0,,, +42,57,58,7,1,0,0,0,1,0,0,0,0,,, +43,59,59,4,1,0,0,0,1,0,0,0,0,,, +44,59,60,2,1,0,0,0,1,0,0,0,0,,, +45,62,62,2,0,NA,0.125,1,1,255,255,1,0,,, +46,62,63,2,0,NA,0.125,1,1,255,255,1,0,,, +47,63,63,3,0,NA,0.125,1,1,0,0,1,0,,, +48,64,64,2,0,NA,0,1,1,255,255,255,0,,, +49,64,65,9,0,NA,0,1,1,255,255,255,0,,, +50,66,66,3,1,0,0,0,1,0,0,0,0,I'm interpreting this as their either (a) the sibling's spouse or (b) the the in-law's spouce,, diff --git a/data-public/metadata/tables-97/_mapping.csv b/data-public/metadata/tables-97/_mapping.csv new file mode 100644 index 0000000..c208e30 --- /dev/null +++ b/data-public/metadata/tables-97/_mapping.csv @@ -0,0 +1,18 @@ +table_name,schema_name,enum_name,enum_file,c_sharp_type,convert_to_enum +item,Metadata,Item,EnumLookupTables.cs,short,TRUE +#item_97,Metadata,item_97,EnumLookupTables.cs,short,TRUE +LUExtractSource,Enum,ExtractSource,EnumLookupTables.cs,byte,TRUE +LUGender,Enum,Gender,EnumLookupTables.cs,byte,TRUE +LUMarkerEvidence,Enum,MarkerEvidence,EnumLookupTables.cs,byte,TRUE +LUMarkerType,Enum,MarkerType,EnumLookupTables.cs,byte,TRUE +LUMultipleBirth,Enum,MultipleBirth,EnumLookupTables.cs,byte,TRUE +LURaceCohort,Enum,RaceCohort,EnumLookupTables.cs,byte,TRUE +LURelationshipPath,Enum,RelationshipPath,EnumLookupTables.cs,byte,TRUE +LURosterGen1,Enum,RosterGen1,EnumResponseGen1.cs,short,TRUE +LUSurveySource,Enum,SurveySource,EnumLookupTables.cs,byte,TRUE +LUTristate,Enum,Tristate,EnumLookupTables.cs,byte,TRUE +LUYesNo,Enum,YesNo,EnumLookupTables.cs,short,TRUE +MzManual,Metadata,NA_character,NA_character,NA_character,FALSE +RosterGen1Assignment,Metadata,NA_character,NA_character,NA_character,FALSE +variable,Metadata,NA_character,NA_character,NA_character,FALSE +#variable_97,Metadata,NA_character,NA_character,NA_character,FALSE diff --git a/utility/connectivity.R b/utility/connectivity.R index 5364df6..a3c0639 100644 --- a/utility/connectivity.R +++ b/utility/connectivity.R @@ -1,4 +1,4 @@ -open_dsn_channel_odbc <- function( ) { +open_dsn_channel_odbc_79 <- function( ) { requireNamespace("odbc") channel <- DBI::dbConnect( @@ -11,8 +11,21 @@ open_dsn_channel_odbc <- function( ) { } # channel <- open_dsn_channel_odbc() # DBI::dbDisconnect(channel); rm(channel) +open_dsn_channel_odbc_97 <- function( ) { + requireNamespace("odbc") + + channel <- DBI::dbConnect( + drv = odbc::odbc(), + dsn = "local-nlsy-links-97" + ) + testit::assert("The ODBC channel should open successfully.", exists("channel")) -open_dsn_channel_rodbc <- function( ) { + return( channel ) +} +# channel <- open_dsn_channel_odbc() +# DBI::dbDisconnect(channel); rm(channel) + +open_dsn_channel_rodbc_79 <- function( ) { requireNamespace("RODBC") channel <- RODBC::odbcConnect( @@ -26,11 +39,27 @@ open_dsn_channel_rodbc <- function( ) { return( channel ) } +open_dsn_channel_rodbc_97 <- function( ) { + requireNamespace("RODBC") + + channel <- RODBC::odbcConnect( + # Uses Trusted/integrated authentication + dsn = "local-nlsy-links-97" + # dsn = "BeeNlsLinks", + # uid = "NlsyReadWrite", + # pwd = "nophi" + ) + testit::assert("The ODBC channel should open successfully.", channel != -1L) + + return( channel ) +} # channel <- open_dsn_channel() # RODBC::odbcClose(channel); rm(channel) -database_inventory <- function( ) { +database_inventory <- function( channel ) { + testit::assert_true(exists("channel")) + sql_table <- " ;WITH t_column AS ( SELECT @@ -65,7 +94,7 @@ database_inventory <- function( ) { # ds <- DBI::dbGetQuery(channel, sql_table) # DBI::dbDisconnect(channel);# rm(channel, sql_table) - channel <- open_dsn_channel_rodbc() + #channel <- open_dsn_channel_rodbc() ds <- RODBC::sqlQuery(channel, sql_table, stringsAsFactors=F) # ds_row_count <- RODBC::sqlTables(channel) RODBC::odbcClose(channel); rm(channel, sql_table)