Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
52f4db3d
Commit
52f4db3d
authored
Feb 14, 2023
by
Pepe Márquez Romero
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
empezando a modificar el script remoto
parent
011cb357
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
333 additions
and
356 deletions
+333
-356
connection_parameters.R
connection_parameters.R
+206
-206
valid_variables_script.R
valid_variables_script.R
+123
-146
valid_variables_script_local.R
valid_variables_script_local.R
+4
-4
No files found.
connection_parameters.R
View file @
52f4db3d
hospital_names
<-
c
(
#Añadir Los de Baskent y sacrocuore
"Princesa"
,
"CIPH"
,
"UMF_Iasis"
,
"SMUC"
,
"HM"
,
"Porto"
,
"FJD"
,
"Coimbra"
,
"UNAV"
,
"TU"
,
"Ankara Impatient"
,
"Konya Impatient"
,
"Istambul Impatient"
,
"Izmir Impatient"
,
"Alanya Impatient"
,
"Adana Impatient"
,
"Ankara Outpatient"
,
"Konya Outpatient"
,
"Istambul Outpatient"
,
"Izmir Outpatient"
,
"Alanya Outpatient"
,
"Sacrocuore Emergency"
,
"Sacrocuore Employees"
,
"Sacrocuore Verona"
,
"Sacrocuore Isaric"
,
"TUDublin"
,
"UMF_Cluj"
,
"UdeA"
,
"Inantro"
,
"UNSA"
,
"UZA"
)
project_names
<-
c
(
"FIBHULP"
,
"CIPH_unCoVer"
,
"umfiasi"
,
"SMUC"
,
"FiHM"
,
"uncover-up"
,
"IISFJD"
,
"IPC"
,
"unCOVer-UNAV"
,
"TU_Uncover"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"S_uncover"
,
"S_uncover"
,
"S_uncover"
,
"S_uncover"
,
"TUDublin"
,
"UMF_Cluj"
,
"INS_Data"
,
"INANTRO"
,
"UnCoVer-BiH-Final"
,
"UZA"
)
resource_names
<-
c
(
"Harmonized_variables_2"
,
"CIPH_numeric_derivated"
,
"20220719_HarmonisedUMFIasi"
,
"SMUC_resource"
,
"20220
720_HarmonisedHM"
,
"Resource_derived"
,
"IISFJD_Harmonized_1"
,
"IPC_Harmonized"
,
"UNAV_rsc"
,
"TU_Harmonized"
,
"inpatient_ankara"
,
"inpatient_konya"
,
"inpatient_istanbul"
,
"inpatient_izmir"
,
"inpatient_alanya"
,
"inpatient_adana"
,
"outpatient_ankara"
,
"outpatient_konya"
,
"outpatient_istanbul"
,
"outpatient_izmir"
,
"outpatient_alanya"
,
"emergency"
,
"employees"
,
"verona"
,
"isaric"
,
"TUDublin_harmonised"
,
"Romania"
,
"colombia_all"
,
"Inantro"
,
"20220722_HarmonizedUNSA"
,
"UZA_prelim"
)
urls
<-
c
(
"https://192.168.1.200:8001"
,
"https://192.168.1.200:8002"
,
"https://192.168.1.200:8003"
,
"https://192.168.1.200:8006"
,
"https://192.168.1.50:9002"
,
"https://192.168.1.102"
,
"https://uncover.itg.be"
,
"https://uncover.itg.be"
,
"https://192.168.1.50:9001"
,
"https://192.168.1.200:8004"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://uncover.itg.be"
,
"https://192.168.1.200:8005"
,
"https://fenfisdi.udea.edu.co/opal"
,
"https://192.168.1.200:8007"
,
"https://192.168.1.200:8008"
,
"https://uncover.itg.be"
)
users
<-
c
(
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
,
"emertens"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
)
pass
<-
c
(
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
,
"3^z4AV.)hG5~PT/]"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
)
hospital_names
<-
c
(
#Añadir Los de Baskent y sacrocuore
"Princesa"
,
"CIPH"
,
"UMF_Iasis"
,
"SMUC"
,
"HM"
,
"Porto"
,
"FJD"
,
"Coimbra"
,
"UNAV"
,
"TU"
,
"Ankara Impatient"
,
"Konya Impatient"
,
"Istambul Impatient"
,
"Izmir Impatient"
,
"Alanya Impatient"
,
"Adana Impatient"
,
"Ankara Outpatient"
,
"Konya Outpatient"
,
"Istambul Outpatient"
,
"Izmir Outpatient"
,
"Alanya Outpatient"
,
"Sacrocuore Emergency"
,
"Sacrocuore Employees"
,
"Sacrocuore Verona"
,
"Sacrocuore Isaric"
,
"TUDublin"
,
"UMF_Cluj"
,
"UdeA"
,
"Inantro"
,
"UNSA"
,
"UZA"
)
project_names
<-
c
(
"FIBHULP"
,
"CIPH_unCoVer"
,
"umfiasi"
,
"SMUC"
,
"FiHM"
,
"uncover-up"
,
"IISFJD"
,
"IPC"
,
"unCOVer-UNAV"
,
"TU_Uncover"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"BU"
,
"S_uncover"
,
"S_uncover"
,
"S_uncover"
,
"S_uncover"
,
"TUDublin"
,
"UMF_Cluj"
,
"INS_Data"
,
"INANTRO"
,
"UnCoVer-BiH-Final"
,
"UZA"
)
resource_names
<-
c
(
"Harmonized_variables_2"
,
"CIPH_numeric_derivated"
,
"20220719_HarmonisedUMFIasi"
,
"SMUC_resource"
,
"20220
919_FiHM"
,
"Resource_derived"
,
"IISFJD_Harmonized_1"
,
"IPC_Harmonized"
,
"UNAV_rsc"
,
"TU_Harmonized"
,
"inpatient_ankara"
,
"inpatient_konya"
,
"inpatient_istanbul"
,
"inpatient_izmir"
,
"inpatient_alanya"
,
"inpatient_adana"
,
"outpatient_ankara"
,
"outpatient_konya"
,
"outpatient_istanbul"
,
"outpatient_izmir"
,
"outpatient_alanya"
,
"emergency"
,
"employees"
,
"verona"
,
"isaric"
,
"TUDublin_harmonised"
,
"Romania"
,
"colombia_all"
,
"Inantro"
,
"20220722_HarmonizedUNSA"
,
"UZA_prelim"
)
urls
<-
c
(
"https://192.168.1.200:8001"
,
"https://192.168.1.200:8002"
,
"https://192.168.1.200:8003"
,
"https://192.168.1.200:8006"
,
"https://192.168.1.50:9002"
,
"https://192.168.1.102"
,
"https://uncover.itg.be"
,
"https://uncover.itg.be"
,
"https://192.168.1.50:9001"
,
"https://192.168.1.200:8004"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.101:8443"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://192.168.1.50:8890"
,
"https://uncover.itg.be"
,
"https://192.168.1.200:8005"
,
"https://fenfisdi.udea.edu.co/opal"
,
"https://192.168.1.200:8007"
,
"https://192.168.1.200:8008"
,
"https://uncover.itg.be"
)
users
<-
c
(
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
,
"emertens"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"user_analisis"
,
"emertens"
)
pass
<-
c
(
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
,
"3^z4AV.)hG5~PT/]"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"Ekfl07UUgz"
,
"3^z4AV.)hG5~PT/]"
)
valid_variables_script.R
View file @
52f4db3d
rm
(
list
=
ls
())
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master
"
)
dir_name
<-
readline
(
"Introduce the name of the directory please:
"
)
source
(
"required_folder_checker.R"
)
source
(
"argument_hasher.R"
)
source
(
"dependency_installer.R"
)
setwd
(
dir_name
)
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSI/DSI_1.2.0.tar.gz", repos=NULL, type="source")
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSOpal/DSOpal_1.2.0.tar.gz", repos=NULL, type="source")
# install.packages("https://cran.r-project.org/src/contrib/Archive/DSLite/DSLite_1.2.0.tar.gz", repos=NULL, type="source")
# install.packages("https://cran.r-project.org/src/contrib/Archive/opalr/opalr_2.1.0.tar.gz", repos=NULL, type="source")
source
(
"dependency_installer.R"
)
source
(
"connection_parameters.R"
)
source
(
"necessary_functions_connection.R"
)
#source("required_folder_checker.R")
#source("argument_hasher.R")
dep_list
=
c
(
"jsonlite"
,
"stringr"
,
"DSI"
,
"DSOpal"
,
"DSLite"
,
"fields"
,
"metafor"
,
"ggplot2"
,
"gridExtra"
,
"data.table"
,
"dsBaseClient"
)
dep_list
=
c
(
"jsonlite"
,
"stringr"
,
"DSI"
,
"DSOpal"
,
"DSLite"
,
"fields"
,
"metafor"
,
"ggplot2"
,
"gridExtra"
,
"data.table"
,
"dsBaseClient"
,
"openxlsx"
)
install_dependencies
(
dep_list
)
#,"DSI","DSOpal","DSLite
"
codebook_file
<-
"20220315_Data Harmonisation.xlsb.xlsx
"
setwd
(
"C:/Users/victor/Documents/TFG/r-analytics-master"
)
source
(
"connection_parameters.R"
)
source
(
"necessary_functions_connection.R"
)
codebook_demo
<-
read.xlsx
(
codebook_file
,
sheet
=
2
)
codebook_com_and_rf
<-
read.xlsx
(
codebook_file
,
sheet
=
3
)
codebook_home_med
<-
read.xlsx
(
codebook_file
,
sheet
=
4
)
codebook_si_sympt
<-
read.xlsx
(
codebook_file
,
sheet
=
5
)
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data"
)
codebook_treatments
<-
read.xlsx
(
codebook_file
,
sheet
=
6
)
codebook_labo
<-
read.xlsx
(
codebook_file
,
sheet
=
7
)
codebook_complications
<-
read.xlsx
(
codebook_file
,
sheet
=
8
)
codebook_imaging_data
<-
read.xlsx
(
codebook_file
,
sheet
=
9
)
ComAndRF
<-
data.frame
(
read.csv
(
"Com&RF.csv"
,
sep
=
","
))[
1
:
64
,
1
:
5
]
Complications
<-
data.frame
(
read.csv
(
"Complications.csv"
,
sep
=
";"
))[
1
:
20
,
1
:
5
]
Dates
<-
data.frame
(
read.csv
(
"Dates.csv"
,
sep
=
";"
))[
1
:
12
,
1
:
5
]
Demographics
<-
data.frame
(
read.csv
(
"Demographics.csv"
,
sep
=
";"
))[
1
:
9
,
1
:
5
]
Home_med
<-
data.frame
(
read.csv
(
"Home_med.csv"
,
sep
=
";"
))[
1
:
13
,
1
:
5
]
Imaging_data
<-
data.frame
(
read.csv
(
"Imaging_data.csv"
,
sep
=
";"
))[
1
:
11
,
1
:
5
]
Labo
<-
data.frame
(
read.csv
(
"Labo.csv"
,
sep
=
";"
))[
1
:
143
,
1
:
5
]
SiAndSympt
<-
data.frame
(
read.csv
(
"Si&Sympt.csv"
,
sep
=
";"
))[
1
:
50
,
1
:
5
]
Treatment
<-
data.frame
(
read.csv
(
"Treatment.csv"
,
sep
=
";"
))[
1
:
32
,
1
:
5
]
LifestyleAndDiet
<-
data.frame
(
read.csv
(
"Lifestyle&Diet.csv"
,
sep
=
";"
))[
1
:
165
,
1
:
5
]
codebook_lifestyle_diet
<-
read.xlsx
(
codebook_file
,
sheet
=
10
)
codebook_dates
<-
read.xlsx
(
codebook_file
,
sheet
=
11
)
codebook
<-
rbind
(
codebook_demo
,
codebook_com_and_rf
)
codebook
<-
rbind
(
codebook
,
codebook_home_med
)
codebook
<-
rbind
(
codebook
,
codebook_si_sympt
)
codebook
<-
rbind
(
codebook
,
codebook_treatments
)
codebook
<-
rbind
(
codebook
,
codebook_labo
)
codebook
<-
rbind
(
codebook
,
codebook_complications
)
codebook
<-
rbind
(
codebook
,
codebook_imaging_data
)
codebook_lifestyle_diet
<-
codebook_lifestyle_diet
[,
!
names
(
codebook_lifestyle_diet
)
%in%
c
(
"X2"
,
"X4"
,
"X10"
)]
codebook
<-
rbind
(
codebook
,
codebook_lifestyle_diet
)
codebook
<-
rbind
(
codebook
,
codebook_dates
)
harmonised_data
<-
rbind
(
SiAndSympt
,
ComAndRF
)
harmonised_data
<-
rbind
(
harmonised_data
,
Treatment
)
harmonised_data
<-
rbind
(
harmonised_data
,
Dates
)
harmonised_data
<-
rbind
(
harmonised_data
,
Demographics
)
harmonised_data
<-
rbind
(
harmonised_data
,
Home_med
)
harmonised_data
<-
rbind
(
harmonised_data
,
Imaging_data
)
harmonised_data
<-
rbind
(
harmonised_data
,
Complications
)
harmonised_data
<-
rbind
(
harmonised_data
,
Labo
)
harmonised_data
<-
rbind
(
harmonised_data
,
LifestyleAndDiet
)
rm
(
list
=
c
(
"SiAndSympt"
,
"Complications"
,
"ComAndRF"
,
"Dates"
,
"Demographics"
,
"Home_med"
,
"Imaging_data"
,
"Complications"
,
"Labo"
,
"LifestyleAndDiet"
))
codebook_col_names
<-
as.data.frame
(
codebook
$
Harmonised.variable.name
)
names
(
codebook_col_names
)
<-
c
(
"col_names"
)
categoric_vars
=
c
(
"DMRGENDR"
,
"DMRBORN"
,
"DMRRETH1"
,
"DMROCCU"
,
"DMRHREDU"
,
"DSXOS"
,
"DSXHO"
,
"DSXIC"
,
"TRXAV"
,
"TRXRIB"
,
"TRXLR"
,
"TRXRM"
,
"TRXIA"
,
"TRXIB"
,
"TRXCH"
,
"TRXAB"
,
"TRXCS"
,
"TRXHEP"
,
"TRXAF"
,
"TRXCP"
,
"TRXOT"
,
"TRXECM"
,
"TRXIV"
,
"TRXNIV"
,
"TRXNO"
,
"TRXOX"
,
"TRXRR"
,
"TRXTR"
,
"TRXVA"
,
"TRXPE"
,
"TRXPV"
,
"TRXIT"
,
"TRXNMB"
,
"TRXAC"
,
"TRXINA"
,
"TRXIS"
,
"TRXIM"
,
"TRXVC"
,
"TRXVD"
,
"TRXZN"
,
"CSXCOT"
,
"CSXCTR"
,
"SMXASAH"
,
"SMXFEA"
,
"SMXCOA"
,
"SMXSTA"
,
"SMXSBA"
,
"SMXRNA"
,
"SMXMYA"
,
"SMXARA"
,
"SMXCPA"
,
"SMXAPA"
,
"SMXINA"
,
"SMXNAA"
,
"SMXDIA"
,
"SMXFAA"
,
"SMXHEA"
,
"SMXCNA"
,
"SMXACA"
,
"SMXSLA"
,
"SMXTLA"
,
"SMXSYA"
,
"SMXWHA"
,
"SMXLYA"
,
"SMXANA"
,
"SMXIWA"
,
"SMXSRA"
,
"SMXBLA"
,
"CMXPRG"
,
"CMXCVD"
,
"CMXCMP"
,
"CMXHT"
,
"CMXDI"
,
"CMXCKD"
,
"CMXCLD"
,
"CMXCPD"
,
"CMXASM"
,
"CMXCND"
,
"CMXRHE"
,
"CMXCCI"
,
"CMXCBD"
,
"CMXDE"
,
"CMXPU"
,
"CMXST"
,
"CMXLY"
,
"CMXAP"
,
"RFXSM"
,
"RFXFSM"
,
"RFXOB"
,
"RFXTB"
,
"RFXIMD"
,
"RFXHIV"
,
"RFXAIDS"
,
"RFXUI"
,
"RFXHC"
,
"RFXONC"
,
"RFXMN"
,
"HMRACI"
,
"HMRARB"
,
"HMRAHO"
,
"HMRNS"
,
"HMROS"
,
"HMRCS"
,
"HMRIS"
,
"HMRAV"
,
"HMRAB"
,
"HMRCOV"
,
"IMDXCT"
,
"IMDXCTCR"
,
"IMDXCTTE"
,
"IMDXCTAB"
,
"IMDXXR"
,
"IMDXPN"
,
"COXRD"
,
"COXAR"
,
"COXPM"
,
"COXMOD"
,
"COXPT"
,
"COXEC"
,
"COXSH"
,
"COXIO"
,
"COXPE"
,
"COXST"
,
"COXDIC"
,
"COXRIO"
,
"COXKF"
,
"COXHF"
,
"COXBC"
)
...
...
@@ -65,48 +54,41 @@ categoric_vars = c("DMRGENDR", "DMRBORN", "DMRRETH1", "DMROCCU", "DMRHREDU", "DS
#----------------------------------------------------------------------------
#Test if column names are valid
check_column_names
<-
function
(
x
){
check_column_names
<-
function
(
codebook_param
,
colnames
){
str_res
<-
"The column names:"
for
(
i
in
1
:
(
nrow
(
data_colnames
))){
if
(
!
check_valid_name
(
data_colnames
[
i
,
1
])){
str_res
<-
paste
(
str_res
,
data_colnames
[
i
,
1
],
sep
=
" "
)
valid_colnames
<-
c
()
for
(
i
in
1
:
(
nrow
(
colnames
))){
colname
<-
colnames
[
i
,
1
]
number_of_column
<-
check_valid_name
(
colname
,
colnames
)
if
(
number_of_column
!=
1
){
str_res
<-
paste
(
str_res
,
colname
,
sep
=
" "
)
}
else
{
valid_colnames
<-
c
(
valid_colnames
,
colname
)
}
}
str_res
<-
paste
(
str_res
,
"are not registered in the harmonized data codebook \n"
,
sep
=
" "
)
re
turn
(
str_r
es
)
re
sult
<-
list
(
"not_colnames"
=
str_res
,
"colnames"
=
valid_colnam
es
)
return
(
result
)
}
#Test if a single variable name is valid
check_valid_name
<-
function
(
x
){
valid
<-
FALSE
aux
<-
as.data.frame
(
strsplit
(
x
,
split
=
"_"
))
if
(
aux
[
1
,
1
]
%in%
harmonised_data
$
Harmonised.variable.name
)
valid
<-
TRUE
return
(
valid
)
}
valid_data_colnames
<-
function
(
x
){
check_valid_name
<-
function
(
col_name
,
col_names
){
valid
_colnames
=
c
()
valid
<-
0
for
(
i
in
1
:
(
nrow
(
data_colnames
))
){
if
(
check_valid_name
(
data_colnames
[
i
,
1
])){
valid_colnames
=
c
(
valid_colnames
,
data_colnames
[
i
,
1
]
)
}
if
(
col_name
%in%
codebook_col_names
$
col_names
){
valid
<-
length
(
grep
(
col_name
,
col_names
)
)
}
return
(
valid
)
return
(
valid_colnames
)
}
remove_space
<-
function
(
x
){
...
...
@@ -132,7 +114,7 @@ is_number <- function(x){
x
<-
str_replace
(
x
,
","
,
"."
)
aux
<-
as.numeric
(
x
)
if
(
!
is.na
(
aux
))
res
<-
TRUE
...
...
@@ -163,7 +145,7 @@ check_values_not_categoric <- function(values, colname){
if
(
is.null
(
value
)){
res
<-
TRUE
}
else
if
(
value
==
"NA"
|
value
==
"nan"
|
value
==
"."
)
res
<-
TRUE
else
{
...
...
@@ -410,7 +392,7 @@ error_message <- function(colname, invalid_values){
}
check_valid_values
<-
function
(){
check_valid_values
<-
function
(
valid_colnames
){
invalid_name_list
<-
c
()
cannot_analyse_list
<-
c
()
...
...
@@ -423,84 +405,82 @@ check_valid_values <- function(){
for
(
i
in
1
:
(
nrow
(
valid_colnames
))){
data_table
=
"empty"
data_table
<-
"empty"
colname
<-
valid_colnames
[
i
,
1
]
if
(
!
grepl
(
"DMRBORN"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
)
&
(
!
grepl
(
"DAT"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"ISO"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
&
(
!
grepl
(
"BEF"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))){
if
(
grepl
(
"DMRBORN"
,
colname
,
fixed
=
TRUE
)
|
(
grepl
(
"DAT"
,
colname
,
fixed
=
TRUE
))
|
(
grepl
(
"ISO"
,
colname
,
fixed
=
TRUE
))
|
(
grepl
(
"BEF"
,
colname
,
fixed
=
TRUE
))){
next
}
column
<-
"data$"
column
<-
paste
(
column
,
colname
,
sep
=
""
)
tryCatch
(
error
=
function
(
cnd
)
{
print
(
"Unable to analyse data"
)
res
<-
FALSE
},
data_table
<-
as.data.frame
(
ds.table
(
column
))
)
if
(
data_table
==
"empty"
){
column
<-
"data$"
column
<-
paste
(
column
,
valid_colnames
[
i
,
1
],
sep
=
""
)
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
colname
)
tryCatch
(
error
=
function
(
cnd
)
{
print
(
"Unable to analyse data"
)
res
<-
FALSE
},
data_table
<-
as.data.frame
(
ds.table
(
column
))
)
}
else
{
if
(
data_table
==
"empty"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
{
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
)
values
<-
get_values_from_quantiles
(
column
)
else
values
<-
row.names
(
data_table
)
numeric_col
<-
paste
(
colname
,
"_numeric"
,
sep
=
""
)
if
(
colname
%in%
categoric_vars
){
has_numeric
<-
numeric_col
%in%
valid_colnames
$
`valid_data_colnames(data_colnames)`
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
)
values
<-
get_values_from_quantiles
(
column
)
else
values
<-
row.names
(
data_table
)
if
(
!
has_numeric
)
missing_numeric
<-
c
(
missing_numeric
,
colname
)
numeric_col
<-
paste
(
valid_colnames
[
i
,
1
],
"_numeric"
,
sep
=
""
)
if
(
valid_colnames
[
i
,
1
]
%in%
categoric_vars
){
#is_numeric <- grepl("numeric",valid_colnames[i,1], fixed=TRUE)
has_numeric
<-
numeric_col
%in%
valid_colnames
$
`valid_data_colnames(data_colnames)`
if
(
!
has_numeric
)
missing_numeric
<-
c
(
missing_numeric
,
valid_colnames
[
i
,
1
])
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
){
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
valid_colnames
[
i
,
1
])
}
else
if
(
!
check_values_categoric
(
values
,
valid_colnames
[
i
,
1
])){
print
(
"Wrong categoric value:"
)
print
(
valid_colnames
[
i
,
1
])
wrong_categoric
<-
c
(
wrong_categoric
,
valid_colnames
[
i
,
1
])
wrong_categoric_values
[[
k
]]
<-
values
k
<-
k
+1
}
# if((!is_numeric & !has_numeric) | is_numeric)
}
else
{
if
(
grepl
(
"numeric"
,
valid_colnames
[
i
,
1
],
fixed
=
TRUE
))
new_colname
<-
strsplit
(
x
=
valid_colnames
[
i
,
1
],
split
=
"_"
)[[
1
]][
1
]
else
new_colname
<-
valid_colnames
[
i
,
1
]
if
(
data_table
[[
1
]]
==
"All studies failed for reasons identified below"
){
valid
<-
check_values_not_categoric
(
values
,
new_
colname
)
cannot_analyse_list
<-
c
(
cannot_analyse_list
,
colname
)
if
(
FALSE
%in%
valid
){
invalid_name_list
<-
c
(
invalid_name_list
,
valid_colnames
[
i
,
1
])
invalid_values_list
[[
j
]]
<-
values
j
<-
j
+1
}
}
else
if
(
!
check_values_categoric
(
values
,
colname
)){
print
(
"Wrong categoric value:"
)
print
(
colname
)
#print(valid_colnames[i,1])
#print(values)
}
#else
# print("This variable has a numeric version")
}
wrong_categoric
<-
c
(
wrong_categoric
,
colname
)
wrong_categoric_values
[[
k
]]
<-
values
k
<-
k
+1
}
}
else
{
if
(
grepl
(
"numeric"
,
colname
,
fixed
=
TRUE
))
new_colname
<-
strsplit
(
x
=
colname
,
split
=
"_"
)[[
1
]][
1
]
else
new_colname
<-
colname
valid
<-
check_values_not_categoric
(
values
,
new_colname
)
if
(
FALSE
%in%
valid
){
invalid_name_list
<-
c
(
invalid_name_list
,
colname
)
invalid_values_list
[[
j
]]
<-
values
j
<-
j
+1
}
#print(colname)
#print(values)
}
#else
# print("This variable has a numeric version")
}
}
missing_numeric
...
...
@@ -515,7 +495,7 @@ check_valid_values <- function(){
res
<-
paste
(
res
,
notify_unable_analyse
(
cannot_analyse_list
),
sep
=
"\n"
)
}
...
...
@@ -553,15 +533,12 @@ data_colnames <- ds.colnames(x=datastructure_name, datasources= connections)
data_colnames
<-
as.data.frame
(
data_colnames
)
check_valid_columns
<-
check_column_names
(
data_colnames
)
check_valid_columns
<-
check_column_names
(
codebook
,
data_colnames
)
valid_columns
<-
as.data.frame
(
check_valid_columns
$
colnames
)
valid_colnames
<-
as.data.frame
(
valid_data_colnames
(
data_colnames
))
#possible_values("CSXCTR")
result
<-
""
result
<-
check_valid_values
()
result
<-
check_valid_values
(
valid_columns
)
print
(
check_valid_columns
)
datashield.logout
(
connections
)
cat
(
result
)
...
...
@@ -588,5 +565,5 @@ cat(check_valid_columns,file=file_name,sep="\n")
cat
(
result
,
file
=
file_name
,
append
=
TRUE
)
datashield.logout
(
connections
)
valid_variables_script_local.R
View file @
52f4db3d
...
...
@@ -120,8 +120,8 @@ check_valid_values_continuous <- function(colname , codebook_param , column){
str_res
<-
"No failing values"
else
{
failing_values
<-
failing_values
[
!
is.na
(
failing_values
)]
#
str_res <- paste( colname , paste(unlist(failing_values) , collapse =" "))
str_res
<-
paste
(
colname
,
collapse
=
" "
)
str_res
<-
paste
(
colname
,
paste
(
unlist
(
failing_values
)
,
collapse
=
" "
))
#
str_res <- paste(colname , collapse =" ")
str_res
<-
paste
(
str_res
,
"should be in range"
,
range_as_str
,
"(continuous)"
,
sep
=
" "
)
}
...
...
@@ -140,8 +140,8 @@ check_valid_values_binary <- function(colname , column){
else
{
range_as_str
<-
"0-1 (binary)"
failing_values
<-
failing_values
[
!
is.na
(
failing_values
)]
#
str_res <- paste(colname , paste(unlist(failing_values) , collapse =" "))
str_res
<-
paste
(
colname
,
collapse
=
" "
)
str_res
<-
paste
(
colname
,
paste
(
unlist
(
failing_values
)
,
collapse
=
" "
))
#
str_res <- paste(colname , collapse =" ")
str_res
<-
paste
(
str_res
,
"should be in range"
,
range_as_str
,
sep
=
" "
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment