Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
3d03e9b5
Commit
3d03e9b5
authored
Feb 09, 2023
by
Pepe Márquez Romero
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
cambiando el codebook y empezando a cambiar el analisis de los valores
parent
3a586839
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
63 additions
and
82 deletions
+63
-82
valid_variables_script.R
valid_variables_script.R
+63
-82
No files found.
valid_variables_script.R
View file @
3d03e9b5
...
...
@@ -4,22 +4,16 @@ dir_name <- readline("Introduce the name of the directory please: ")
setwd
(
dir_name
)
source
(
"required_folder_checker.R"
)
source
(
"argument_hasher.R"
)
source
(
"dependency_installer.R"
)
dep_list
=
c
(
"jsonlite"
,
"stringr"
,
"DSI"
,
"DSOpal"
,
"DSLite"
,
"fields"
,
"metafor"
,
"ggplot2"
,
"gridExtra"
,
"data.table"
,
"dsBaseClient"
,
"openxlsx"
)
install_dependencies
(
dep_list
)
setwd
(
dir_name
)
#source("connection_parameters.R")
#source("necessary_functions_connection.R")
codebook
<-
read.csv
(
"
harmon.csv"
,
sep
=
";
"
)
codebook
<-
read.csv
(
"
new_harmon.csv"
,
sep
=
",
"
)
codebook_col_names
<-
as.data.frame
(
codebook
$
Harmonised.variable.name
)
...
...
@@ -47,18 +41,22 @@ check_column_names <- function(col_names){
str_res
<-
"The column names:"
valid_colnames
<-
c
()
repeated_colnames
<-
c
()
for
(
i
in
1
:
(
nrow
(
col_names
))){
col_name
<-
col_names
[
i
,
1
]
if
(
!
check_valid_name
(
col_name
)){
number_of_column
<-
check_valid_name
(
col_name
)
if
(
number_of_column
==
0
){
str_res
<-
paste
(
str_res
,
col_name
,
sep
=
" "
)
}
else
{
}
else
if
(
number_of_column
==
1
)
{
valid_colnames
=
c
(
valid_colnames
,
col_name
)
}
else
{
repeated_colnames
=
c
(
repeated_colnames
,
col_name
)
}
}
str_res
<-
paste
(
str_res
,
"are not registered in the harmonized data codebook \n"
,
sep
=
" "
)
new_list
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnames
)
new_list
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnames
,
"repeated_colnames"
=
repeated_colnames
)
return
(
new_list
)
}
...
...
@@ -66,10 +64,13 @@ check_column_names <- function(col_names){
#Test if a single variable name is valid
check_valid_name
<-
function
(
col_name
){
valid
<-
FALSE
valid
<-
0
if
(
col_name
%in%
codebook_col_names
$
col_names
){
if
(
col_name
%in%
codebook_col_names
$
col_names
)
valid
<-
TRUE
valid
<-
length
(
grep
(
col_name
,
names
(
harmonized_data
)))
}
return
(
valid
)
...
...
@@ -376,7 +377,7 @@ error_message <- function(colname, invalid_values){
}
check_valid_values
<-
function
(){
check_valid_values
<-
function
(
valid_colnames
){
invalid_name_list
<-
c
()
cannot_analyse_list
<-
c
()
...
...
@@ -388,37 +389,20 @@ check_valid_values <- function(){
k
<-
1
for
(
i
in
1
:
(
nrow
(
valid_colnames
))){
name
<-
names
(
valid_colnames_with_data
)[
i
]
if
(
"DMRBORN"
==
name
|
grepl
(
"DAT"
,
colname
,
fixed
=
TRUE
)
|
"ISO"
==
name
|
"BEF"
==
name
){
next
}
data_table
=
"empty"
if
(
!
grepl
(
"DMRBORN"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
)
&
(
!
grepl
(
"DAT"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"ISO"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"BEF"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))){
column
<-
"data$"
column
<-
paste
(
column
,
valid_colnames
[
i
,
1
],
sep
=
""
)
column
<-
valid_colnames
[,
i
]
tryCatch
(
error
=
function
(
cnd
)
{
print
(
"Unable to analyse data"
)
res
<-
FALSE
},
data_table
<-
as.data.frame
(
table
(
column
))
)
if
(
data_table
==
"empty"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
{
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
)
values
<-
get_values_from_quantiles
(
column
)
else
values
<-
row.names
(
data_table
)
numeric_col
<-
paste
(
valid_colnames
[
i
,
1
],
"_numeric"
,
sep
=
""
)
numeric_col
<-
paste
(
valid_colnames
[,
i
],
"_numeric"
,
sep
=
""
)
if
(
valid_colnames
[
i
,
1
]
%in%
categoric_vars
){
if
(
name
%in%
categoric_vars
){
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric
<-
numeric_col
%in%
valid_colnames
$
`valid_data_colnames(data_colnames)`
...
...
@@ -427,11 +411,7 @@ check_valid_values <- function(){
missing_numeric
<-
c
(
missing_numeric
,
valid_colnames
[
i
,
1
])
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
if
(
!
check_values_categoric
(
values
,
valid_colnames
[
i
,
1
])){
if
(
!
check_values_categoric
(
values
,
valid_colnames
[
i
,
1
])){
print
(
"Wrong categoric value:"
)
print
(
valid_colnames
[
i
,
1
])
...
...
@@ -458,10 +438,10 @@ check_valid_values <- function(){
}
}
}
}
missing_numeric
...
...
@@ -505,10 +485,11 @@ columns_not_valid <- check_valid_columns$not_colnames
valid_colnames
<-
as.data.frame
(
check_valid_columns
$
colnames
)
names
(
valid_colnames
)
=
c
(
"valid_colnames"
)
valid_colnames_with_data
<-
subset
(
harmonized_data
,
select
=
valid_colnames
$
valid_colnames
)
result
<-
""
result
<-
check_valid_values
()
result
<-
check_valid_values
(
valid_colnames_with_data
)
print
(
check_valid_columns
)
#datashield.logout(connections)
cat
(
result
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment