Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
097715bb
Commit
097715bb
authored
Apr 11, 2023
by
GNajeral
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixed local script
parent
a89d6844
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
147 additions
and
144 deletions
+147
-144
valid_variables_script_local.R
valid_variables_script_local.R
+147
-144
No files found.
valid_variables_script_local.R
View file @
097715bb
...
...
@@ -100,153 +100,163 @@ check_valid_name <- function(col_name){
}
# check_valid_values_continuous <- function(colname , codebook_param , column){
#
# column <- column[column != "."]
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list = str_split(possible_values_format , "/")[[1]]
#
# range_as_str <- str_trim(possible_values_list[1])
# missing_value_format <- str_trim(str_trim(possible_values_list[2]))
#
# separate_range <- str_split(range_as_str , "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else{
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, "(continuous)", sep = " ")
# }
#
# return(str_res)
# }
#
#
#
# check_valid_values_categorical <- function(colname, codebook_param, column) {
# column <- column[column != "."]
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list <- str_split(possible_values_format, "/")[[1]]
#
# possible_values_list <- lapply(possible_values_list, str_trim)
#
# str_res <- ""
# min_value <- 0
# max_value <- 0
# if (length(possible_values_list[[1]]) == 2) {
# separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
# } else {
# possible_values_list <- lapply(possible_values_list, strtoi)[[1]]
# min_value <- possible_values_list[1]
# max_value <- possible_values_list[length(possible_values_list) - 1]
# }
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# if (number_of_failing_values == 0) {
# str_res <- "No failing values"
# } else {
# range_as_str <- paste(min_value, "-", max_value, " (categorical)")
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
#
# check_valid_values_binary <- function(colname, column) {
# column <- column[column != "."]
# failing_values <- column[column < 0 | column > 1]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else {
# range_as_str <- "0-1 (binary)"
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
# check_valid_values_continuous <- function(colname , codebook_param , column){
#
# column <- as.numeric(column[column != "."])
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
#
# value_format <- strsplit(possible_values_format, " / ")[[1]]
# min_value <- str_trim(gsub(",", ".", (sub("-.*", "", value_format[1]))))
# max_value <- str_trim(gsub(",", ".", (sub(".*-", "", value_format[1]))))
# if(min_value == ""){
# min_value <- str_trim(sub(",.*", "", value_format[1]))
# max_value <- str_trim(sub(".*,", "", value_format[1]))
# }
# min_value <- as.double(min_value)
# max_value <- as.double(max_value)
# print(colname)
# print(min_value)
# print(max_value)
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else{
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", value_format, "(continuous)", sep = " ")
# }
#
# return(str_res)
# }
#
#
#
# check_valid_values_categorical <- function(colname, codebook_param, column) {
# column <- as.numeric(column[column != "."])
# possible_values_format <- codebook_param$Possible.values.format[codebook_param$Harmonised.variable.name == colname]
# possible_values_list <- str_split(possible_values_format, "/")[[1]]
#
# possible_values_list <- lapply(possible_values_list, str_trim)
#
# str_res <- ""
# min_value <- 0
# max_value <- 0
# if (length(possible_values_list[[1]]) == 2) {
# separate_range <- str_split(possible_values_list[[1]][1], "-")[[1]]
# min_value <- strtoi(separate_range[1])
# max_value <- strtoi(separate_range[2])
# } else {
# possible_values_list <- lapply(possible_values_list, strtoi)[[1]]
# min_value <- possible_values_list[1]
# max_value <- possible_values_list[length(possible_values_list) - 1]
# }
#
# failing_values <- column[column < min_value | column > max_value]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# if (number_of_failing_values == 0) {
# str_res <- "No failing values"
# } else {
# range_as_str <- paste(min_value, "-", max_value, " (categorical)")
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
#
# check_valid_values_binary <- function(colname, column) {
# column <- as.numeric(column[column != "."])
# failing_values <- column[column < 0 | column > 1]
# number_of_failing_values <- length(failing_values[!is.na(failing_values)])
#
# str_res <- ""
# if (number_of_failing_values == 0)
# str_res <- "No failing values"
# else {
# range_as_str <- "0-1 (binary)"
# failing_values <- failing_values[!is.na(failing_values)]
# failing_value_counts <- table(failing_values)
#
# str_res <- paste(colname, "has failing values:")
#
# for (i in seq_along(failing_value_counts)) {
# value <- names(failing_value_counts)[i]
# count <- failing_value_counts[i]
# str_res <- paste(str_res, paste(value, "(", count, "times)", collapse = " "), sep = " ")
# }
#
# str_res <- paste(str_res, "should be in range", range_as_str, sep = " ")
# }
#
# return(str_res)
# }
check_valid_values_continuous
<-
function
(
colname
,
codebook_param
,
column
)
{
column
<-
column
[
column
!=
"."
]
column
<-
as.numeric
(
column
[
column
!=
"."
])
possible_values_format
<-
codebook_param
$
Possible.values.format
[
codebook_param
$
Harmonised.variable.name
==
colname
]
possible_values_list
=
str_split
(
possible_values_format
,
"/"
)[[
1
]]
range_as_str
<-
str_trim
(
possible_values_list
[
1
])
missing_value_format
<-
str_trim
(
str_trim
(
possible_values_list
[
2
]))
separate_range
<-
str_split
(
range_as_str
,
"-"
)[[
1
]]
min_value
<-
strtoi
(
separate_range
[
1
])
max_value
<-
strtoi
(
separate_range
[
2
])
value_format
<-
strsplit
(
possible_values_format
,
" / "
)[[
1
]]
min_value
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
"-.*"
,
""
,
value_format
[
1
]))))
max_value
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
".*-"
,
""
,
value_format
[
1
]))))
if
(
min_value
==
""
){
min_value
<-
str_trim
(
sub
(
",.*"
,
""
,
value_format
[
1
]))
max_value
<-
str_trim
(
sub
(
".*,"
,
""
,
value_format
[
1
]))
}
min_value
<-
as.double
(
min_value
)
max_value
<-
as.double
(
max_value
)
print
(
colname
)
print
(
min_value
)
print
(
max_value
)
failing_values
<-
column
[
column
<
min_value
|
column
>
max_value
]
number_of_failing_values
<-
length
(
failing_values
[
!
is.na
(
failing_values
)])
str_res
<-
""
if
(
number_of_failing_values
==
0
)
{
str_res
<-
"No failing values"
}
else
{
range_as_str
<-
paste
(
min_value
,
"-"
,
max_value
,
"(continuous)"
)
str_res
<-
paste
(
colname
,
"has"
,
number_of_failing_values
,
"failing values"
)
str_res
<-
paste
(
str_res
,
"should be in range"
,
range_as_str
,
sep
=
" "
)
}
return
(
str_res
)
}
check_valid_values_categorical
<-
function
(
colname
,
codebook_param
,
column
)
{
column
<-
column
[
column
!=
"."
]
column
<-
as.numeric
(
column
[
column
!=
"."
])
possible_values_format
<-
codebook_param
$
Possible.values.format
[
codebook_param
$
Harmonised.variable.name
==
colname
]
possible_values_list
<-
str_split
(
possible_values_format
,
"/"
)[[
1
]]
possible_values_list
<-
lapply
(
possible_values_list
,
str_trim
)
str_res
<-
""
min_value
<-
0
max_value
<-
0
...
...
@@ -259,43 +269,40 @@ check_valid_values_categorical <- function(colname, codebook_param, column) {
min_value
<-
possible_values_list
[
1
]
max_value
<-
possible_values_list
[
length
(
possible_values_list
)
-
1
]
}
failing_values
<-
column
[
column
<
min_value
|
column
>
max_value
]
number_of_failing_values
<-
length
(
failing_values
[
!
is.na
(
failing_values
)])
if
(
number_of_failing_values
==
0
)
{
str_res
<-
"No failing values"
}
else
{
range_as_str
<-
paste
(
min_value
,
"-"
,
max_value
,
" (categorical)"
)
str_res
<-
paste
(
colname
,
"has"
,
number_of_failing_values
,
"failing values"
)
str_res
<-
paste
(
str_res
,
"should be in range"
,
range_as_str
,
sep
=
" "
)
}
return
(
str_res
)
}
check_valid_values_binary
<-
function
(
colname
,
column
)
{
column
<-
column
[
column
!=
"."
]
column
<-
as.numeric
(
column
[
column
!=
"."
])
failing_values
<-
column
[
column
<
0
|
column
>
1
]
number_of_failing_values
<-
length
(
failing_values
[
!
is.na
(
failing_values
)])
str_res
<-
""
if
(
number_of_failing_values
==
0
)
str_res
<-
"No failing values"
else
{
range_as_str
<-
"0-1 (binary)"
str_res
<-
paste
(
colname
,
"has"
,
number_of_failing_values
,
"failing values"
)
str_res
<-
paste
(
str_res
,
"should be in range"
,
range_as_str
,
sep
=
" "
)
}
return
(
str_res
)
}
check_valid_values
<-
function
(
valid_colnames
,
codebook_param
){
res
<-
""
...
...
@@ -307,13 +314,8 @@ check_valid_values <- function(valid_colnames, codebook_param){
next
}
#if("DMRBORN" == name | grepl("DAT", name, fixed=TRUE) | grepl("ISO", name , fixed=TRUE) | grepl("BEF", name, fixed=TRUE)){
# next
#}
column
<-
valid_colnames
[,
i
]
# Esto falla si tu codebook no es mismo que new_harmon.csv
column_type
<-
codebook_param
$
Variable.type
[
codebook_param
$
Harmonised.variable.name
==
name
]
if
(
is.na
(
column_type
)
)
{
...
...
@@ -334,7 +336,7 @@ check_valid_values <- function(valid_colnames, codebook_param){
}
)
if
(
result
!=
"No failing values"
){
if
(
any
(
result
!=
"No failing values"
)
){
res
<-
paste
(
res
,
result
,
sep
=
"\n"
)
}
}
...
...
@@ -344,6 +346,7 @@ check_valid_values <- function(valid_colnames, codebook_param){
}
data_colnames
<-
as.data.frame
(
colnames
(
harmonized_data
))
check_valid_columns
<-
check_column_names
(
data_colnames
)
...
...
@@ -359,4 +362,4 @@ result <- ""
result
<-
check_valid_values
(
valid_colnames_with_data
,
codebook
)
print
(
columns_not_valid
)
cat
(
result
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment