Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
3d03e9b5
Commit
3d03e9b5
authored
Feb 09, 2023
by
Pepe Márquez Romero
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cambiando el codebook y empezando a cambiar el analisis de los valores
parent
3a586839
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
63 additions
and
82 deletions
+63
-82
valid_variables_script.R
valid_variables_script.R
+63
-82
No files found.
valid_variables_script.R
View file @
3d03e9b5
...
...
@@ -4,22 +4,16 @@ dir_name <- readline("Introduce the name of the directory please: ")
setwd
(
dir_name
)
source
(
"required_folder_checker.R"
)
source
(
"argument_hasher.R"
)
source
(
"dependency_installer.R"
)
dep_list
=
c
(
"jsonlite"
,
"stringr"
,
"DSI"
,
"DSOpal"
,
"DSLite"
,
"fields"
,
"metafor"
,
"ggplot2"
,
"gridExtra"
,
"data.table"
,
"dsBaseClient"
,
"openxlsx"
)
install_dependencies
(
dep_list
)
setwd
(
dir_name
)
#source("connection_parameters.R")
#source("necessary_functions_connection.R")
codebook
<-
read.csv
(
"
harmon.csv"
,
sep
=
";
"
)
codebook
<-
read.csv
(
"
new_harmon.csv"
,
sep
=
",
"
)
codebook_col_names
<-
as.data.frame
(
codebook
$
Harmonised.variable.name
)
...
...
@@ -47,18 +41,22 @@ check_column_names <- function(col_names){
str_res
<-
"The column names:"
valid_colnames
<-
c
()
repeated_colnames
<-
c
()
for
(
i
in
1
:
(
nrow
(
col_names
))){
col_name
<-
col_names
[
i
,
1
]
if
(
!
check_valid_name
(
col_name
)){
number_of_column
<-
check_valid_name
(
col_name
)
if
(
number_of_column
==
0
){
str_res
<-
paste
(
str_res
,
col_name
,
sep
=
" "
)
}
else
{
}
else
if
(
number_of_column
==
1
)
{
valid_colnames
=
c
(
valid_colnames
,
col_name
)
}
else
{
repeated_colnames
=
c
(
repeated_colnames
,
col_name
)
}
}
str_res
<-
paste
(
str_res
,
"are not registered in the harmonized data codebook \n"
,
sep
=
" "
)
new_list
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnames
)
new_list
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnames
,
"repeated_colnames"
=
repeated_colnames
)
return
(
new_list
)
}
...
...
@@ -66,10 +64,13 @@ check_column_names <- function(col_names){
#Test if a single variable name is valid
check_valid_name
<-
function
(
col_name
){
valid
<-
FALSE
valid
<-
0
if
(
col_name
%in%
codebook_col_names
$
col_names
)
valid
<-
TRUE
if
(
col_name
%in%
codebook_col_names
$
col_names
){
valid
<-
length
(
grep
(
col_name
,
names
(
harmonized_data
)))
}
return
(
valid
)
...
...
@@ -98,7 +99,7 @@ is_number <- function(x){
x
<-
str_replace
(
x
,
","
,
"."
)
aux
<-
as.numeric
(
x
)
if
(
!
is.na
(
aux
))
res
<-
TRUE
...
...
@@ -129,7 +130,7 @@ check_values_not_categoric <- function(values, colname){
if
(
is.null
(
value
)){
res
<-
TRUE
}
else
if
(
value
==
"NA"
|
value
==
"nan"
|
value
==
"."
)
res
<-
TRUE
else
{
...
...
@@ -376,7 +377,7 @@ error_message <- function(colname, invalid_values){
}
check_valid_values
<-
function
(){
check_valid_values
<-
function
(
valid_colnames
){
invalid_name_list
<-
c
()
cannot_analyse_list
<-
c
()
...
...
@@ -388,80 +389,59 @@ check_valid_values <- function(){
k
<-
1
for
(
i
in
1
:
(
nrow
(
valid_colnames
))){
name
<-
names
(
valid_colnames_with_data
)[
i
]
if
(
"DMRBORN"
==
name
|
grepl
(
"DAT"
,
colname
,
fixed
=
TRUE
)
|
"ISO"
==
name
|
"BEF"
==
name
){
next
}
column
<-
valid_colnames
[,
i
]
data_table
=
"empty"
data_table
<-
as.data.frame
(
table
(
column
))
if
(
!
grepl
(
"DMRBORN"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
)
&
(
!
grepl
(
"DAT"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"ISO"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"BEF"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))){
values
<-
row.names
(
data_table
)
numeric_col
<-
paste
(
valid_colnames
[,
i
],
"_numeric"
,
sep
=
""
)
if
(
name
%in%
categoric_vars
){
column
<-
"data$"
column
<-
paste
(
column
,
valid_colnames
[
i
,
1
],
sep
=
""
)
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric
<-
numeric_col
%in%
valid_colnames
$
`valid_data_colnames(data_colnames)`
tryCatch
(
error
=
function
(
cnd
)
{
print
(
"Unable to analyse data"
)
res
<-
FALSE
},
data_table
<-
as.data.frame
(
table
(
column
))
)
if
(
!
has_numeric
)
missing_numeric
<-
c
(
missing_numeric
,
valid_colnames
[
i
,
1
])
if
(
data_table
==
"empty"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
{
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
)
values
<-
get_values_from_quantiles
(
column
)
else
values
<-
row.names
(
data_table
)
if
(
!
check_values_categoric
(
values
,
valid_colnames
[
i
,
1
])){
numeric_col
<-
paste
(
valid_colnames
[
i
,
1
],
"_numeric"
,
sep
=
""
)
print
(
"Wrong categoric value:"
)
print
(
valid_colnames
[
i
,
1
])
if
(
valid_colnames
[
i
,
1
]
%in%
categoric_vars
){
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric
<-
numeric_col
%in%
valid_colnames
$
`valid_data_colnames(data_colnames)`
if
(
!
has_numeric
)
missing_numeric
<-
c
(
missing_numeric
,
valid_colnames
[
i
,
1
])
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
if
(
!
check_values_categoric
(
values
,
valid_colnames
[
i
,
1
])){
print
(
"Wrong categoric value:"
)
print
(
valid_colnames
[
i
,
1
])
wrong_categoric
<-
c
(
wrong_categoric
,
valid_colnames
[
i
,
1
])
wrong_categoric_values
[[
k
]]
<-
values
k
<-
k
+1
}
}
else
{
if
(
grepl
(
"numeric"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
new_colname
<-
strsplit
(
x
=
valid_colnames
[
i
,
1
],
split
=
"_"
)[[
1
]][
1
]
else
new_colname
<-
valid_colnames
[
i
,
1
]
valid
<-
check_values_not_categoric
(
values
,
new_colname
)
if
(
FALSE
%in%
valid
){
invalid_name_list
<-
c
(
invalid_name_list
,
valid_colnames
[
i
,
1
])
invalid_values_list
[[
j
]]
<-
values
j
<-
j
+1
}
}
wrong_categoric
<-
c
(
wrong_categoric
,
valid_colnames
[
i
,
1
])
wrong_categoric_values
[[
k
]]
<-
values
k
<-
k
+1
}
}
else
{
if
(
grepl
(
"numeric"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
new_colname
<-
strsplit
(
x
=
valid_colnames
[
i
,
1
],
split
=
"_"
)[[
1
]][
1
]
else
new_colname
<-
valid_colnames
[
i
,
1
]
valid
<-
check_values_not_categoric
(
values
,
new_colname
)
if
(
FALSE
%in%
valid
){
invalid_name_list
<-
c
(
invalid_name_list
,
valid_colnames
[
i
,
1
])
invalid_values_list
[[
j
]]
<-
values
j
<-
j
+1
}
}
}
missing_numeric
...
...
@@ -476,7 +456,7 @@ check_valid_values <- function(){
res
<-
paste
(
res
,
notify_unable_analyse
(
cannot_analyse_list
),
sep
=
"\n"
)
}
...
...
@@ -505,10 +485,11 @@ columns_not_valid <- check_valid_columns$not_colnames
valid_colnames
<-
as.data.frame
(
check_valid_columns
$
colnames
)
names
(
valid_colnames
)
=
c
(
"valid_colnames"
)
valid_colnames_with_data
<-
subset
(
harmonized_data
,
select
=
valid_colnames
$
valid_colnames
)
result
<-
""
result
<-
check_valid_values
()
result
<-
check_valid_values
(
valid_colnames_with_data
)
print
(
check_valid_columns
)
#datashield.logout(connections)
cat
(
result
)
...
...
@@ -525,4 +506,4 @@ cat(check_valid_columns,file=file_name,sep="\n")
cat
(
result
,
file
=
file_name
,
append
=
TRUE
)
#datashield.logout(connections)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment