Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
ef256262
Commit
ef256262
authored
Mar 06, 2023
by
GNajeral
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
First working script
parent
841190e3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
22 deletions
+34
-22
valid_variables_script2.R
valid_variables_script2.R
+34
-22
No files found.
valid_variables_script2.R
View file @
ef256262
...
...
@@ -49,9 +49,6 @@ codebook_col_names <- as.data.frame(codebook$Harmonised.variable.name)
names
(
codebook_col_names
)
<-
c
(
"col_names"
)
categoric_vars
=
c
(
"DMRGENDR"
,
"DMRBORN"
,
"DMRRETH1"
,
"DMROCCU"
,
"DMRHREDU"
,
"DSXOS"
,
"DSXHO"
,
"DSXIC"
,
"TRXAV"
,
"TRXRIB"
,
"TRXLR"
,
"TRXRM"
,
"TRXIA"
,
"TRXIB"
,
"TRXCH"
,
"TRXAB"
,
"TRXCS"
,
"TRXHEP"
,
"TRXAF"
,
"TRXCP"
,
"TRXOT"
,
"TRXECM"
,
"TRXIV"
,
"TRXNIV"
,
"TRXNO"
,
"TRXOX"
,
"TRXRR"
,
"TRXTR"
,
"TRXVA"
,
"TRXPE"
,
"TRXPV"
,
"TRXIT"
,
"TRXNMB"
,
"TRXAC"
,
"TRXINA"
,
"TRXIS"
,
"TRXIM"
,
"TRXVC"
,
"TRXVD"
,
"TRXZN"
,
"CSXCOT"
,
"CSXCTR"
,
"SMXASAH"
,
"SMXFEA"
,
"SMXCOA"
,
"SMXSTA"
,
"SMXSBA"
,
"SMXRNA"
,
"SMXMYA"
,
"SMXARA"
,
"SMXCPA"
,
"SMXAPA"
,
"SMXINA"
,
"SMXNAA"
,
"SMXDIA"
,
"SMXFAA"
,
"SMXHEA"
,
"SMXCNA"
,
"SMXACA"
,
"SMXSLA"
,
"SMXTLA"
,
"SMXSYA"
,
"SMXWHA"
,
"SMXLYA"
,
"SMXANA"
,
"SMXIWA"
,
"SMXSRA"
,
"SMXBLA"
,
"CMXPRG"
,
"CMXCVD"
,
"CMXCMP"
,
"CMXHT"
,
"CMXDI"
,
"CMXCKD"
,
"CMXCLD"
,
"CMXCPD"
,
"CMXASM"
,
"CMXCND"
,
"CMXRHE"
,
"CMXCCI"
,
"CMXCBD"
,
"CMXDE"
,
"CMXPU"
,
"CMXST"
,
"CMXLY"
,
"CMXAP"
,
"RFXSM"
,
"RFXFSM"
,
"RFXOB"
,
"RFXTB"
,
"RFXIMD"
,
"RFXHIV"
,
"RFXAIDS"
,
"RFXUI"
,
"RFXHC"
,
"RFXONC"
,
"RFXMN"
,
"HMRACI"
,
"HMRARB"
,
"HMRAHO"
,
"HMRNS"
,
"HMROS"
,
"HMRCS"
,
"HMRIS"
,
"HMRAV"
,
"HMRAB"
,
"HMRCOV"
,
"IMDXCT"
,
"IMDXCTCR"
,
"IMDXCTTE"
,
"IMDXCTAB"
,
"IMDXXR"
,
"IMDXPN"
,
"COXRD"
,
"COXAR"
,
"COXPM"
,
"COXMOD"
,
"COXPT"
,
"COXEC"
,
"COXSH"
,
"COXIO"
,
"COXPE"
,
"COXST"
,
"COXDIC"
,
"COXRIO"
,
"COXKF"
,
"COXHF"
,
"COXBC"
)
#----------------------------------------------------------------------------
#Test if column names are valid
...
...
@@ -132,8 +129,8 @@ is_number <- function(x){
check_values_format
<-
function
(
valid_columns
,
codebook_param
){
res
<-
""
variables_out_of_range
=
"Variables out of range:"
#
for(i in 1:length(valid_columns[[1]])){
for
(
i
in
1
:
9
){
for
(
i
in
1
:
length
(
valid_columns
[[
1
]])){
#for(i in 1:1
){
current_column
<-
valid_columns
[[
1
]][[
i
]]
print
(
current_column
)
variable_type
<-
codebook_param
$
Variable.type
[
codebook
$
Harmonised.variable.name
==
current_column
]
...
...
@@ -150,12 +147,13 @@ check_values_format <- function(valid_columns, codebook_param){
### parse del formato de una variable continua ##
## esta sentencia funciona codebook$Possible.values.format[codebook$Harmonised.variable.name == "CMXDE"] pruebala en el interprete.
value_format
<-
strsplit
(
codebook_param
$
Possible.values.format
[
codebook_param
$
Harmonised.variable.name
==
current_column
],
" / "
)[[
1
]]
high_limit
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
"-.*"
,
""
,
value_format
[
1
]))))
low_limit
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
".*-"
,
""
,
value_format
[
1
]))))
possible_values
<-
codebook_param
$
Possible.values.format
[
codebook_param
$
Harmonised.variable.name
==
current_column
]
value_format
<-
strsplit
(
possible_values
,
" / "
)[[
1
]]
low_limit
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
"-.*"
,
""
,
value_format
[
1
]))))
high_limit
<-
str_trim
(
gsub
(
","
,
"."
,
(
sub
(
".*-"
,
""
,
value_format
[
1
]))))
if
(
low_limit
==
""
){
high
_limit
<-
str_trim
(
sub
(
",.*"
,
""
,
value_format
[
1
]))
low
_limit
<-
str_trim
(
strtrimsub
(
".*,"
,
""
,
value_format
[
1
]))
low
_limit
<-
str_trim
(
sub
(
",.*"
,
""
,
value_format
[
1
]))
high
_limit
<-
str_trim
(
strtrimsub
(
".*,"
,
""
,
value_format
[
1
]))
}
### parse del formato de una variable continua ##
...
...
@@ -169,6 +167,7 @@ check_values_format <- function(valid_columns, codebook_param){
res
<-
c
(
res
,
error
)
},
{
print
(
paste
(
"Higher Limit: "
,
high_limit
))
ds.dataFrameSubset
(
df.name
=
"data"
,
V1.name
=
paste
(
"data$"
,
current_column
,
sep
=
""
),
V2.name
=
high_limit
,
...
...
@@ -177,7 +176,7 @@ check_values_format <- function(valid_columns, codebook_param){
keep.NAs
=
TRUE
,
datasources
=
connections
)
print
(
paste
(
"Lower Limit: "
,
low_limit
))
ds.dataFrameSubset
(
df.name
=
"inRangeHigh"
,
V1.name
=
paste
(
"inRangeHigh$"
,
current_column
,
sep
=
""
),
V2.name
=
low_limit
,
...
...
@@ -190,13 +189,18 @@ check_values_format <- function(valid_columns, codebook_param){
if
(
ds.length
(
paste
(
"data$"
,
current_column
,
sep
=
""
))[[
1
]]
>
summary
[[
1
]][[
2
]]){
variables_out_of_range
<-
paste
(
variables_out_of_range
,
current_column
,
sep
=
" "
)
print
(
paste
(
current_column
,
"does not follow the established format"
,
sep
=
" "
))
print
(
paste
(
"It should follow the following format: "
,
possible_values
))
}
else
{
print
(
paste
(
paste
(
"Data in: "
,
current_column
),
" was valid"
))
}
}
)
################## FIN ESTO PODRÍA IR EN UNA FUNC DIFERENTE #############
}
else
if
(
variable_type
==
"Categorical"
||
variable_type
==
"Binary"
){
value_format
<-
lapply
(
strsplit
(
mierda
,
"/"
)
,
str_trim
)[[
1
]]
possible_values
<-
codebook_param
$
Possible.values.format
[
codebook_param
$
Harmonised.variable.name
==
current_column
]
value_format
<-
lapply
(
strsplit
(
possible_values
,
"/"
)
,
str_trim
)[[
1
]]
tryCatch
(
error
=
function
(
cnd
)
{
if
(
grepl
(
"list them with datashield.errors()"
,
cnd
))
...
...
@@ -209,14 +213,22 @@ check_values_format <- function(valid_columns, codebook_param){
{
contingency_table
<-
ds.table
(
paste
(
"data$"
,
current_column
,
sep
=
""
))
row_names
<-
rownames
(
contingency_table
[[
1
]][[
3
]])
result
<-
FALSE
for
(
i
in
1
:
length
(
row_names
))
{
if
(
row_names
[
i
]
==
"NA"
)
next
if
(
!
row_names
[
i
]
%in%
value_format
){
variables_out_of_range
<-
paste
(
variables_out_of_range
,
current_column
,
sep
=
" "
)
print
(
paste
(
current_column
,
"does not follow the established format"
,
sep
=
" "
))
print
(
paste
(
"It should follow the following format:"
,
possible_values
))
print
(
paste
(
"Instead of:"
,
paste
(
row_names
,
collapse
=
" "
)))
result
<-
TRUE
break
}
}
if
(
!
result
){
print
(
paste
(
paste
(
"Data in: "
,
current_column
),
" was valid"
))
}
}
)
}
...
...
@@ -232,28 +244,28 @@ inp <- auxConnections[[2]]
#Conexión a la base de datos
#
ds.dim("data", datasources = connections)
#
colnames <- ds.colnames("data")
#
colnames
#
ds.dim
(
"data"
,
datasources
=
connections
)
colnames
<-
ds.colnames
(
"data"
)
colnames
# ds.dataFrameSubset(df.name = "data",
# V1.name = "data$DM
XBMI
",
# V2.name = "1
3
0",
# V1.name = "data$DM
RAGEYR
",
# V2.name = "1
5
0",
# Boolean.operator = "<=",
# newobj = "inRangeHigh",
# keep.NAs = TRUE,
# datasources = connections)
#
# ds.dataFrameSubset(df.name = "inRangeHigh",
# V1.name = "inRangeHigh$DM
XBMI
",
# V2.name = "
11
",
# V1.name = "inRangeHigh$DM
RAGEYR
",
# V2.name = "
0
",
# Boolean.operator = ">=",
# newobj = "inRange",
# keep.NAs = TRUE,
# datasources = connections)
#
# summary <- ds.summary("inRange$DM
XBMI
")
# if(ds.length("data$DM
XBMI
")[[1]] > summary[[1]][[2]]){
# summary <- ds.summary("inRange$DM
RAGEYR
")
# if(ds.length("data$DM
RAGEYR
")[[1]] > summary[[1]][[2]]){
# res <- c(res, paste(current_column, "does not follow the established format" , sep="\n"))
# }
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment