Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
83078851
Commit
83078851
authored
Mar 01, 2023
by
GNajeral
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
valid_variables_script2 file added
parent
a118c52e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
218 additions
and
0 deletions
+218
-0
valid_variables_script2.R
valid_variables_script2.R
+218
-0
No files found.
valid_variables_script2.R
0 → 100644
View file @
83078851
rm
(
list
=
ls
())
dir_name
<-
readline
(
"Introduce the name of the directory please: "
)
#/Users/gnl/Documents/CTB UPM/UNCOVER/uncover_harmonization
setwd
(
dir_name
)
source
(
"dependency_installer.R"
)
source
(
"connection_parameters.R"
)
source
(
"necessary_functions_connection.R"
)
#source("required_folder_checker.R")
#source("argument_hasher.R")
dep_list
=
c
(
"jsonlite"
,
"stringr"
,
"DSI"
,
"DSOpal"
,
"DSLite"
,
"fields"
,
"metafor"
,
"ggplot2"
,
"gridExtra"
,
"data.table"
,
"dsBaseClient"
,
"openxlsx"
)
install_dependencies
(
dep_list
)
codebook_file
<-
"20220315_Data Harmonisation.xlsb.xlsx"
codebook_demo
<-
read.xlsx
(
codebook_file
,
sheet
=
2
)
codebook_com_and_rf
<-
read.xlsx
(
codebook_file
,
sheet
=
3
)
codebook_home_med
<-
read.xlsx
(
codebook_file
,
sheet
=
4
)
codebook_si_sympt
<-
read.xlsx
(
codebook_file
,
sheet
=
5
)
codebook_treatments
<-
read.xlsx
(
codebook_file
,
sheet
=
6
)
codebook_labo
<-
read.xlsx
(
codebook_file
,
sheet
=
7
)
codebook_complications
<-
read.xlsx
(
codebook_file
,
sheet
=
8
)
codebook_imaging_data
<-
read.xlsx
(
codebook_file
,
sheet
=
9
)
codebook_lifestyle_diet
<-
read.xlsx
(
codebook_file
,
sheet
=
10
)
codebook_dates
<-
read.xlsx
(
codebook_file
,
sheet
=
11
)
codebook
<-
rbind
(
codebook_demo
,
codebook_com_and_rf
)
codebook
<-
rbind
(
codebook
,
codebook_home_med
)
codebook
<-
rbind
(
codebook
,
codebook_si_sympt
)
codebook
<-
rbind
(
codebook
,
codebook_treatments
)
codebook
<-
rbind
(
codebook
,
codebook_labo
)
codebook
<-
rbind
(
codebook
,
codebook_complications
)
codebook
<-
rbind
(
codebook
,
codebook_imaging_data
)
codebook_lifestyle_diet
<-
codebook_lifestyle_diet
[,
!
names
(
codebook_lifestyle_diet
)
%in%
c
(
"X2"
,
"X4"
,
"X10"
)]
codebook
<-
rbind
(
codebook
,
codebook_lifestyle_diet
)
codebook
<-
rbind
(
codebook
,
codebook_dates
)
codebook_col_names
<-
as.data.frame
(
codebook
$
Harmonised.variable.name
)
names
(
codebook_col_names
)
<-
c
(
"col_names"
)
categoric_vars
=
c
(
"DMRGENDR"
,
"DMRBORN"
,
"DMRRETH1"
,
"DMROCCU"
,
"DMRHREDU"
,
"DSXOS"
,
"DSXHO"
,
"DSXIC"
,
"TRXAV"
,
"TRXRIB"
,
"TRXLR"
,
"TRXRM"
,
"TRXIA"
,
"TRXIB"
,
"TRXCH"
,
"TRXAB"
,
"TRXCS"
,
"TRXHEP"
,
"TRXAF"
,
"TRXCP"
,
"TRXOT"
,
"TRXECM"
,
"TRXIV"
,
"TRXNIV"
,
"TRXNO"
,
"TRXOX"
,
"TRXRR"
,
"TRXTR"
,
"TRXVA"
,
"TRXPE"
,
"TRXPV"
,
"TRXIT"
,
"TRXNMB"
,
"TRXAC"
,
"TRXINA"
,
"TRXIS"
,
"TRXIM"
,
"TRXVC"
,
"TRXVD"
,
"TRXZN"
,
"CSXCOT"
,
"CSXCTR"
,
"SMXASAH"
,
"SMXFEA"
,
"SMXCOA"
,
"SMXSTA"
,
"SMXSBA"
,
"SMXRNA"
,
"SMXMYA"
,
"SMXARA"
,
"SMXCPA"
,
"SMXAPA"
,
"SMXINA"
,
"SMXNAA"
,
"SMXDIA"
,
"SMXFAA"
,
"SMXHEA"
,
"SMXCNA"
,
"SMXACA"
,
"SMXSLA"
,
"SMXTLA"
,
"SMXSYA"
,
"SMXWHA"
,
"SMXLYA"
,
"SMXANA"
,
"SMXIWA"
,
"SMXSRA"
,
"SMXBLA"
,
"CMXPRG"
,
"CMXCVD"
,
"CMXCMP"
,
"CMXHT"
,
"CMXDI"
,
"CMXCKD"
,
"CMXCLD"
,
"CMXCPD"
,
"CMXASM"
,
"CMXCND"
,
"CMXRHE"
,
"CMXCCI"
,
"CMXCBD"
,
"CMXDE"
,
"CMXPU"
,
"CMXST"
,
"CMXLY"
,
"CMXAP"
,
"RFXSM"
,
"RFXFSM"
,
"RFXOB"
,
"RFXTB"
,
"RFXIMD"
,
"RFXHIV"
,
"RFXAIDS"
,
"RFXUI"
,
"RFXHC"
,
"RFXONC"
,
"RFXMN"
,
"HMRACI"
,
"HMRARB"
,
"HMRAHO"
,
"HMRNS"
,
"HMROS"
,
"HMRCS"
,
"HMRIS"
,
"HMRAV"
,
"HMRAB"
,
"HMRCOV"
,
"IMDXCT"
,
"IMDXCTCR"
,
"IMDXCTTE"
,
"IMDXCTAB"
,
"IMDXXR"
,
"IMDXPN"
,
"COXRD"
,
"COXAR"
,
"COXPM"
,
"COXMOD"
,
"COXPT"
,
"COXEC"
,
"COXSH"
,
"COXIO"
,
"COXPE"
,
"COXST"
,
"COXDIC"
,
"COXRIO"
,
"COXKF"
,
"COXHF"
,
"COXBC"
)
#----------------------------------------------------------------------------
#Test if column names are valid
check_column_names
<-
function
(
codebook_param
,
colnames
){
str_res
<-
"The column names:"
valid_colnames
<-
c
()
for
(
i
in
1
:
(
nrow
(
colnames
))){
colname
<-
colnames
[
i
,
1
]
number_of_column
<-
check_valid_name
(
colname
,
colnames
)
if
(
number_of_column
!=
1
){
str_res
<-
paste
(
str_res
,
colname
,
sep
=
" "
)
}
else
{
valid_colnames
<-
c
(
valid_colnames
,
colname
)
}
}
str_res
<-
paste
(
str_res
,
"are not registered in the harmonized data codebook \n"
,
sep
=
" "
)
result
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnames
)
return
(
result
)
}
#Test if a single variable name is valid
check_valid_name
<-
function
(
col_name
,
col_names
){
valid
<-
0
if
(
col_name
%in%
codebook_col_names
$
col_names
){
valid
<-
length
(
grep
(
col_name
,
col_names
))
}
return
(
valid
)
}
remove_space
<-
function
(
x
){
searchString
<-
' '
replacementString
<-
''
res
=
sub
(
searchString
,
replacementString
,
x
)
return
(
res
)
}
remove_spaces_from_ds
<-
function
(
ds
){
res
<-
lapply
(
ds
,
remove_space
)
return
(
as.data.frame
(
res
))
}
is_number
<-
function
(
x
){
res
<-
FALSE
if
(
length
(
x
)
!=
0
){
x
<-
str_replace
(
x
,
","
,
"."
)
aux
<-
as.numeric
(
x
)
if
(
!
is.na
(
aux
))
res
<-
TRUE
}
return
(
res
)
}
check_values_format
<-
function
(
valid_columns
){
res
<-
""
for
(
i
in
1
:
length
(
valid_columns
[[
1
]])){
print
(
i
)
current_column
<-
valid_columns
[[
1
]][[
i
]]
if
(
current_column
%in%
codebook_labo
$
Harmonised.variable.name
){
index
<-
which
(
current_column
,
codebook_labo
$
Harmonised.variable.name
)
value_format
<-
strsplit
(
codebook_si_sympt
$
Possible.values.format
[
2
],
" / "
)[[
1
]]
high_limit
<-
as.numeric
(
sub
(
"-.*"
,
""
,
value_format
[
1
]))
low_limit
<-
as.numeric
(
sub
(
".*-"
,
""
,
value_format
[
1
]))
if
(
codebook_labo
$
Variable.type
[
index
]
==
"Continuous"
){
ds.dataFrameSubset
(
df.name
=
"data"
,
V1.name
=
paste
(
"data$"
,
current_column
,
sep
=
""
),
V2.name
=
high_limit
,
Boolean.operator
=
"<="
,
newobj
=
"inRangeHigh"
,
keep.NAs
=
TRUE
,
datasources
=
connections
)
ds.dataFrameSubset
(
df.name
=
"inRangeHigh"
,
V1.name
=
paste
(
"inRangeHigh$"
,
current_column
,
sep
=
""
),
V2.name
=
low_limit
,
Boolean.operator
=
">="
,
newobj
=
"inRange"
,
keep.NAs
=
TRUE
,
datasources
=
connections
)
summary
<-
ds.summary
(
paste
(
"inRange$"
,
current_column
,
sep
=
""
))
if
(
ds.length
(
paste
(
"data$"
,
current_column
,
sep
=
""
))
>
summary
[[
1
]][[
2
]]){
res
<-
c
(
res
,
paste
(
current_column
,
"does not follow the established format"
,
sep
=
"\n"
))
}
}
}
}
return
(
res
)
}
auxConnections
<-
connect
()
connections
<-
auxConnections
[[
1
]]
inp
<-
auxConnections
[[
2
]]
#Conexión a la base de datos
ds.dim
(
"data"
,
datasources
=
connections
)
colnames
<-
ds.colnames
(
"data"
)
colnames
# ds.dataFrameSubset(df.name = "data",
# V1.name = "data$LBXAPTTA",
# V2.name = "130",
# Boolean.operator = "<=",
# newobj = "inRangeHigh",
# keep.NAs = TRUE,
# datasources = connections)
#
# lengthHigh <- ds.length(x='inRangeHigh$LBXAPTTA', datasources = connections)
#
#
# ds.dataFrameSubset(df.name = "inRangeHigh",
# V1.name = "inRangeHigh$LBXAPTTA",
# V2.name = "11",
# Boolean.operator = ">=",
# newobj = "inRange",
# keep.NAs = TRUE,
# datasources = connections)
#
# lengthBuenos <- ds.length(x='inRange$LBXAPTTA', datasources = connections)
#
# summary <- ds.summary("inRange$LBXAPTTA")
#----------------------------------------------------------------------------
#Check valid column names
datastructure_name
<-
"data"
data_colnames
<-
ds.colnames
(
x
=
datastructure_name
,
datasources
=
connections
)
data_colnames
<-
as.data.frame
(
data_colnames
)
check_valid_columns
<-
check_column_names
(
codebook
,
data_colnames
)
valid_columns
<-
as.data.frame
(
check_valid_columns
$
colnames
)
res
<-
""
res
<-
check_values_format
(
valid_columns
)
print
(
res
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment