Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Sign in
Toggle navigation
H
Harmonize_Scripts
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Uncover
Harmonize_Scripts
Commits
6d57f049
Commit
6d57f049
authored
Feb 02, 2023
by
pxp9
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
pequeña mejora en la sustitucion de Yes/No
parent
3570d2a0
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
285 additions
and
280 deletions
+285
-280
ressourceCleaner.R
ressourceCleaner.R
+285
-280
No files found.
ressourceCleaner.R
View file @
6d57f049
rm
(
list
=
ls
())
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources"
)
setwd
((
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient"
))
#Cambiarlo por el nombre del ressource que se desea limpiar
hospital
<-
data.frame
(
read.csv
(
"konya_outpatient.csv"
,
sep
=
","
))
hospital
<-
hospital
%>%
select
(
-
contains
(
"numeric"
))
# hospital["NOT.HARMONISED"] <- NULL
#
# names <- colnames(hospital)
# for (i in 1:length(names)){
#
# if(grepl("NOT.HARMONISED", names[i])){
# hospital[names[i]] <- NULL
# print(paste("quito ", names[i]))
# }
#
# }
# hospital <- hospital[-1,]
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data"
)
ComAndRF
<-
data.frame
(
read.csv
(
"Com&RF.csv"
,
sep
=
","
))[
1
:
64
,
1
:
5
]
Complications
<-
data.frame
(
read.csv
(
"Complications.csv"
,
sep
=
";"
))[
1
:
20
,
1
:
5
]
Dates
<-
data.frame
(
read.csv
(
"Dates.csv"
,
sep
=
";"
))[
1
:
12
,
1
:
5
]
Demographics
<-
data.frame
(
read.csv
(
"Demographics.csv"
,
sep
=
";"
))[
1
:
9
,
1
:
5
]
Home_med
<-
data.frame
(
read.csv
(
"Home_med.csv"
,
sep
=
";"
))[
1
:
13
,
1
:
5
]
Imaging_data
<-
data.frame
(
read.csv
(
"Imaging_data.csv"
,
sep
=
";"
))[
1
:
11
,
1
:
5
]
Labo
<-
data.frame
(
read.csv
(
"Labo.csv"
,
sep
=
";"
))[
1
:
143
,
1
:
5
]
SiAndSympt
<-
data.frame
(
read.csv
(
"Si&Sympt.csv"
,
sep
=
";"
))[
1
:
50
,
1
:
5
]
Treatment
<-
data.frame
(
read.csv
(
"Treatment.csv"
,
sep
=
";"
))[
1
:
32
,
1
:
5
]
LifestyleAndDiet
<-
data.frame
(
read.csv
(
"Lifestyle&Diet.csv"
,
sep
=
";"
))[
1
:
165
,
1
:
5
]
harmonised_data
<-
rbind
(
SiAndSympt
,
ComAndRF
)
harmonised_data
<-
rbind
(
harmonised_data
,
Treatment
)
harmonised_data
<-
rbind
(
harmonised_data
,
Dates
)
harmonised_data
<-
rbind
(
harmonised_data
,
Demographics
)
harmonised_data
<-
rbind
(
harmonised_data
,
Home_med
)
harmonised_data
<-
rbind
(
harmonised_data
,
Imaging_data
)
harmonised_data
<-
rbind
(
harmonised_data
,
Complications
)
harmonised_data
<-
rbind
(
harmonised_data
,
Labo
)
harmonised_data
<-
rbind
(
harmonised_data
,
LifestyleAndDiet
)
rm
(
list
=
c
(
"SiAndSympt"
,
"Complications"
,
"ComAndRF"
,
"Dates"
,
"Demographics"
,
"Home_med"
,
"Imaging_data"
,
"Complications"
,
"Labo"
,
"LifestyleAndDiet"
))
noYesValues
<-
subset
(
harmonised_data
,
harmonised_data
$
Harmonised.data.format.unit
==
"No/Yes / missing"
|
harmonised_data
$
Harmonised.data.format.unit
==
"No/Yes / Missing"
)
noYesValues
<-
noYesValues
$
Harmonised.variable.name
noYesValues
<-
c
(
noYesValues
,
"CSXCOTAB"
,
"CSXCOTAG"
,
"IMDIT"
,
"RFXHIV_RFXAIDS"
,
"SMXASAH"
,
"CMXATH"
,
"CMXNO"
)
categoric_vars
=
c
(
"CMXATH"
,
"CMXNO"
,
"SMXASAH"
,
"CSXCOTAB"
,
"CSXCOTAG"
,
"IMDIT"
,
"RFXHIV_RFXAIDS"
,
"DMRGENDR"
,
"DMRBORN"
,
"DMRRETH1"
,
"DMROCCU"
,
"DMRHREDU"
,
"DSXOS"
,
"DSXHO"
,
"DSXIC"
,
"TRXAV"
,
"TRXRIB"
,
"TRXLR"
,
"TRXRM"
,
"TRXIA"
,
"TRXIB"
,
"TRXCH"
,
"TRXAB"
,
"TRXCS"
,
"TRXHEP"
,
"TRXAF"
,
"TRXCP"
,
"TRXOT"
,
"TRXECM"
,
"TRXIV"
,
"TRXNIV"
,
"TRXNO"
,
"TRXOX"
,
"TRXRR"
,
"TRXTR"
,
"TRXVA"
,
"TRXPE"
,
"TRXPV"
,
"TRXIT"
,
"TRXNMB"
,
"TRXAC"
,
"TRXINA"
,
"TRXIS"
,
"TRXIM"
,
"TRXVC"
,
"TRXVD"
,
"TRXZN"
,
"CSXCOT"
,
"CSXCTR"
,
"SMXASAH"
,
"SMXFEA"
,
"SMXCOA"
,
"SMXSTA"
,
"SMXSBA"
,
"SMXRNA"
,
"SMXMYA"
,
"SMXARA"
,
"SMXCPA"
,
"SMXAPA"
,
"SMXINA"
,
"SMXNAA"
,
"SMXDIA"
,
"SMXFAA"
,
"SMXHEA"
,
"SMXCNA"
,
"SMXACA"
,
"SMXSLA"
,
"SMXTLA"
,
"SMXSYA"
,
"SMXWHA"
,
"SMXLYA"
,
"SMXANA"
,
"SMXIWA"
,
"SMXSRA"
,
"SMXBLA"
,
"CMXPRG"
,
"CMXCVD"
,
"CMXCMP"
,
"CMXHT"
,
"CMXDI"
,
"CMXCKD"
,
"CMXCLD"
,
"CMXCPD"
,
"CMXASM"
,
"CMXCND"
,
"CMXRHE"
,
"CMXCCI"
,
"CMXCBD"
,
"CMXDE"
,
"CMXPU"
,
"CMXST"
,
"CMXLY"
,
"CMXAP"
,
"RFXSM"
,
"RFXFSM"
,
"RFXOB"
,
"RFXTB"
,
"RFXIMD"
,
"RFXHIV"
,
"RFXAIDS"
,
"RFXUI"
,
"RFXHC"
,
"RFXONC"
,
"RFXMN"
,
"HMRACI"
,
"HMRARB"
,
"HMRAHO"
,
"HMRNS"
,
"HMROS"
,
"HMRCS"
,
"HMRIS"
,
"HMRAV"
,
"HMRAB"
,
"HMRCOV"
,
"IMDXCT"
,
"IMDXCTCR"
,
"IMDXCTTE"
,
"IMDXCTAB"
,
"IMDXXR"
,
"IMDXPN"
,
"COXRD"
,
"COXAR"
,
"COXPM"
,
"COXMOD"
,
"COXPT"
,
"COXEC"
,
"COXSH"
,
"COXIO"
,
"COXPE"
,
"COXST"
,
"COXDIC"
,
"COXRIO"
,
"COXKF"
,
"COXHF"
,
"COXBC"
)
personalized
<-
c
(
"DMRGENDR"
,
"DSXOS"
,
"CSXCTR"
,
"SMXFEA"
,
"CSXCOT"
)
is_number
<-
function
(
x
){
res
<-
FALSE
if
(
length
(
x
)
!=
0
){
x
<-
str_replace
(
x
,
","
,
"."
)
aux
<-
as.numeric
(
x
)
if
(
!
is.na
(
aux
))
res
<-
TRUE
}
return
(
res
)
}
replaceNoYesValues
<-
function
(
x
){
#Replace the value with Yes or No
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
0
|
x
==
"No"
|
x
==
" No"
|
x
==
"NO"
)
x
<-
"No"
else
if
(
x
==
1
|
x
==
"Yes"
|
x
==
" Yes"
|
x
==
"SI"
)
x
<-
"Yes"
else
x
<-
""
return
(
x
)
}
fixNonCategoric
<-
function
(
x
){
if
(
!
is_number
(
x
)){
x
<-
""
}
else
{
x
<-
str_replace
(
x
,
","
,
"."
)
}
return
(
x
)
}
personalizedFun
<-
function
(
x
,
colname
){
if
(
colname
==
"DMRGENDR"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
|
x
==
"F"
|
x
==
"f"
|
x
==
"Female"
)
x
<-
"Female"
else
if
(
x
==
0
|
x
==
"M"
|
x
==
"m"
|
x
==
"Male"
)
x
<-
"Male"
}
if
(
colname
==
"CSXCTR"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
|
x
==
"positive"
|
x
==
"PositivM"
|
x
==
"POSITIVM"
|
x
==
"POS?T?VM"
|
x
==
"NMGAT?VM"
)
x
<-
"Positive"
else
if
(
x
==
0
|
x
==
"negative"
|
x
==
"negativM"
|
x
==
"NMGATIVM"
|
x
==
"NAGATIVM"
|
x
==
"NMGATIV"
|
x
==
"negativeM"
|
x
==
"NAGAT?VM"
|
x
==
"NMGAT?V"
)
x
<-
"Negative"
}
if
(
colname
==
"SMXFEA"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"Yes"
else
if
(
x
==
0
)
x
<-
"No"
else
if
(
x
==
"."
)
x
<-
""
}
if
(
colname
==
"DMRRETH1"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"Asian"
else
if
(
x
==
2
)
x
<-
"Black"
else
if
(
x
==
3
)
x
<-
"Hispanic"
else
if
(
x
==
4
)
x
<-
"White"
else
if
(
x
==
5
)
x
<-
"Multiracial"
else
if
(
x
==
6
)
x
<-
"Other"
}
if
(
colname
==
"DMROCCU"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"Unemployed"
else
if
(
x
==
2
)
x
<-
"Student"
else
if
(
x
==
3
)
x
<-
"Employed"
else
if
(
x
==
4
)
x
<-
"Self-employed"
else
if
(
x
==
5
)
x
<-
"Retired"
else
if
(
x
==
6
)
x
<-
""
}
if
(
colname
==
"DMRHREDU"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"High School"
else
if
(
x
==
2
)
x
<-
"Bachelors"
else
if
(
x
==
3
)
x
<-
"Postgraduate"
else
if
(
x
==
4
)
x
<-
"Other"
}
if
(
colname
==
"DSXOS"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
0
|
x
==
"Recovered"
)
x
<-
"Recovered"
else
if
(
x
==
1
|
x
==
"Deceased"
)
x
<-
"Deceased"
else
if
(
x
==
2
|
x
==
"Transferred"
)
x
<-
"Transferred"
else
x
<-
""
}
if
(
colname
==
"CSXCOT"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"PCR"
else
if
(
x
==
2
)
x
<-
"antigen"
else
if
(
x
==
3
)
x
<-
"other"
else
x
<-
""
}
return
(
x
)
}
dotToBar
<-
function
(
x
){
if
(
grepl
(
"."
,
x
,
fixed
=
TRUE
))
res
<-
format
(
as.Date
(
x
,
format
=
"%d.%m.%Y"
),
"%d/%m/%Y"
)
else
res
<-
x
return
(
res
)
}
rm
(
newDf
)
newDf
<-
hospital
names
<-
colnames
(
hospital
)
for
(
j
in
1
:
ncol
(
hospital
)){
percentage
<-
trunc
(
j
/
ncol
(
hospital
)
*
100
)
mes
<-
paste
(
toString
(
percentage
),
"% completed"
,
sep
=
""
)
print
(
mes
)
print
(
names
[
j
])
for
(
i
in
1
:
nrow
(
hospital
)){
if
(
names
[
j
]
%in%
noYesValues
){
newDf
[
i
,
j
]
<-
replaceNoYesValues
(
hospital
[
i
,
j
])
}
else
if
(
!
(
names
[
j
]
%in%
categoric_vars
)
&
names
[
j
]
!=
"DMRBORN"
&
!
grepl
(
"DAT"
,
names
[
j
],
fixed
=
TRUE
)){
newDf
[
i
,
j
]
<-
fixNonCategoric
(
hospital
[
i
,
j
])
}
if
(
names
[
j
]
%in%
personalized
){
newDf
[
i
,
j
]
<-
personalizedFun
(
hospital
[
i
,
j
],
names
[
j
])
}
if
(
is.na
(
hospital
[
i
,
j
]))
newDf
[
i
,
j
]
<-
""
else
if
(
hospital
[
i
,
j
]
==
"."
)
newDf
[
i
,
j
]
<-
""
}
}
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/newRessources"
)
setwd
((
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient/clean"
))
write.csv
(
x
=
newDf
,
file
=
"konya_outpatient.csv"
,
row.names
=
FALSE
)
rm
(
list
=
ls
())
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources"
)
setwd
((
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient"
))
#Cambiarlo por el nombre del ressource que se desea limpiar
hospital
<-
data.frame
(
read.csv
(
"konya_outpatient.csv"
,
sep
=
","
))
hospital
<-
hospital
%>%
select
(
-
contains
(
"numeric"
))
# hospital["NOT.HARMONISED"] <- NULL
#
# names <- colnames(hospital)
# for (i in 1:length(names)){
#
# if(grepl("NOT.HARMONISED", names[i])){
# hospital[names[i]] <- NULL
# print(paste("quito ", names[i]))
# }
#
# }
# hospital <- hospital[-1,]
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/harmonised_data"
)
ComAndRF
<-
data.frame
(
read.csv
(
"Com&RF.csv"
,
sep
=
","
))[
1
:
64
,
1
:
5
]
Complications
<-
data.frame
(
read.csv
(
"Complications.csv"
,
sep
=
";"
))[
1
:
20
,
1
:
5
]
Dates
<-
data.frame
(
read.csv
(
"Dates.csv"
,
sep
=
";"
))[
1
:
12
,
1
:
5
]
Demographics
<-
data.frame
(
read.csv
(
"Demographics.csv"
,
sep
=
";"
))[
1
:
9
,
1
:
5
]
Home_med
<-
data.frame
(
read.csv
(
"Home_med.csv"
,
sep
=
";"
))[
1
:
13
,
1
:
5
]
Imaging_data
<-
data.frame
(
read.csv
(
"Imaging_data.csv"
,
sep
=
";"
))[
1
:
11
,
1
:
5
]
Labo
<-
data.frame
(
read.csv
(
"Labo.csv"
,
sep
=
";"
))[
1
:
143
,
1
:
5
]
SiAndSympt
<-
data.frame
(
read.csv
(
"Si&Sympt.csv"
,
sep
=
";"
))[
1
:
50
,
1
:
5
]
Treatment
<-
data.frame
(
read.csv
(
"Treatment.csv"
,
sep
=
";"
))[
1
:
32
,
1
:
5
]
LifestyleAndDiet
<-
data.frame
(
read.csv
(
"Lifestyle&Diet.csv"
,
sep
=
";"
))[
1
:
165
,
1
:
5
]
harmonised_data
<-
rbind
(
SiAndSympt
,
ComAndRF
)
harmonised_data
<-
rbind
(
harmonised_data
,
Treatment
)
harmonised_data
<-
rbind
(
harmonised_data
,
Dates
)
harmonised_data
<-
rbind
(
harmonised_data
,
Demographics
)
harmonised_data
<-
rbind
(
harmonised_data
,
Home_med
)
harmonised_data
<-
rbind
(
harmonised_data
,
Imaging_data
)
harmonised_data
<-
rbind
(
harmonised_data
,
Complications
)
harmonised_data
<-
rbind
(
harmonised_data
,
Labo
)
harmonised_data
<-
rbind
(
harmonised_data
,
LifestyleAndDiet
)
rm
(
list
=
c
(
"SiAndSympt"
,
"Complications"
,
"ComAndRF"
,
"Dates"
,
"Demographics"
,
"Home_med"
,
"Imaging_data"
,
"Complications"
,
"Labo"
,
"LifestyleAndDiet"
))
noYesValues
<-
subset
(
harmonised_data
,
harmonised_data
$
Harmonised.data.format.unit
==
"No/Yes / missing"
|
harmonised_data
$
Harmonised.data.format.unit
==
"No/Yes / Missing"
)
noYesValues
<-
noYesValues
$
Harmonised.variable.name
noYesValues
<-
c
(
noYesValues
,
"CSXCOTAB"
,
"CSXCOTAG"
,
"IMDIT"
,
"RFXHIV_RFXAIDS"
,
"SMXASAH"
,
"CMXATH"
,
"CMXNO"
)
categoric_vars
=
c
(
"CMXATH"
,
"CMXNO"
,
"SMXASAH"
,
"CSXCOTAB"
,
"CSXCOTAG"
,
"IMDIT"
,
"RFXHIV_RFXAIDS"
,
"DMRGENDR"
,
"DMRBORN"
,
"DMRRETH1"
,
"DMROCCU"
,
"DMRHREDU"
,
"DSXOS"
,
"DSXHO"
,
"DSXIC"
,
"TRXAV"
,
"TRXRIB"
,
"TRXLR"
,
"TRXRM"
,
"TRXIA"
,
"TRXIB"
,
"TRXCH"
,
"TRXAB"
,
"TRXCS"
,
"TRXHEP"
,
"TRXAF"
,
"TRXCP"
,
"TRXOT"
,
"TRXECM"
,
"TRXIV"
,
"TRXNIV"
,
"TRXNO"
,
"TRXOX"
,
"TRXRR"
,
"TRXTR"
,
"TRXVA"
,
"TRXPE"
,
"TRXPV"
,
"TRXIT"
,
"TRXNMB"
,
"TRXAC"
,
"TRXINA"
,
"TRXIS"
,
"TRXIM"
,
"TRXVC"
,
"TRXVD"
,
"TRXZN"
,
"CSXCOT"
,
"CSXCTR"
,
"SMXASAH"
,
"SMXFEA"
,
"SMXCOA"
,
"SMXSTA"
,
"SMXSBA"
,
"SMXRNA"
,
"SMXMYA"
,
"SMXARA"
,
"SMXCPA"
,
"SMXAPA"
,
"SMXINA"
,
"SMXNAA"
,
"SMXDIA"
,
"SMXFAA"
,
"SMXHEA"
,
"SMXCNA"
,
"SMXACA"
,
"SMXSLA"
,
"SMXTLA"
,
"SMXSYA"
,
"SMXWHA"
,
"SMXLYA"
,
"SMXANA"
,
"SMXIWA"
,
"SMXSRA"
,
"SMXBLA"
,
"CMXPRG"
,
"CMXCVD"
,
"CMXCMP"
,
"CMXHT"
,
"CMXDI"
,
"CMXCKD"
,
"CMXCLD"
,
"CMXCPD"
,
"CMXASM"
,
"CMXCND"
,
"CMXRHE"
,
"CMXCCI"
,
"CMXCBD"
,
"CMXDE"
,
"CMXPU"
,
"CMXST"
,
"CMXLY"
,
"CMXAP"
,
"RFXSM"
,
"RFXFSM"
,
"RFXOB"
,
"RFXTB"
,
"RFXIMD"
,
"RFXHIV"
,
"RFXAIDS"
,
"RFXUI"
,
"RFXHC"
,
"RFXONC"
,
"RFXMN"
,
"HMRACI"
,
"HMRARB"
,
"HMRAHO"
,
"HMRNS"
,
"HMROS"
,
"HMRCS"
,
"HMRIS"
,
"HMRAV"
,
"HMRAB"
,
"HMRCOV"
,
"IMDXCT"
,
"IMDXCTCR"
,
"IMDXCTTE"
,
"IMDXCTAB"
,
"IMDXXR"
,
"IMDXPN"
,
"COXRD"
,
"COXAR"
,
"COXPM"
,
"COXMOD"
,
"COXPT"
,
"COXEC"
,
"COXSH"
,
"COXIO"
,
"COXPE"
,
"COXST"
,
"COXDIC"
,
"COXRIO"
,
"COXKF"
,
"COXHF"
,
"COXBC"
)
personalized
<-
c
(
"DMRGENDR"
,
"DSXOS"
,
"CSXCTR"
,
"SMXFEA"
,
"CSXCOT"
)
is_number
<-
function
(
x
){
res
<-
FALSE
if
(
length
(
x
)
!=
0
){
x
<-
str_replace
(
x
,
","
,
"."
)
aux
<-
as.numeric
(
x
)
if
(
!
is.na
(
aux
))
res
<-
TRUE
}
return
(
res
)
}
replaceNoYesValues
<-
function
(
x
){
#Replace the value with Yes or No
if
(
is.na
(
x
)){
x
<-
""
return
(
x
)
}
x
<-
trimws
(
toupper
(
x
))
if
(
x
==
"0"
|
x
==
"NO"
)
x
<-
"No"
else
if
(
x
==
"1"
|
x
==
"YES"
|
x
==
"SI"
)
x
<-
"Yes"
else
x
<-
""
return
(
x
)
}
fixNonCategoric
<-
function
(
x
){
if
(
!
is_number
(
x
)){
x
<-
""
}
else
{
x
<-
str_replace
(
x
,
","
,
"."
)
}
return
(
x
)
}
personalizedFun
<-
function
(
x
,
colname
){
if
(
colname
==
"DMRGENDR"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
|
x
==
"F"
|
x
==
"f"
|
x
==
"Female"
)
x
<-
"Female"
else
if
(
x
==
0
|
x
==
"M"
|
x
==
"m"
|
x
==
"Male"
)
x
<-
"Male"
}
if
(
colname
==
"CSXCTR"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
|
x
==
"positive"
|
x
==
"PositivM"
|
x
==
"POSITIVM"
|
x
==
"POS?T?VM"
|
x
==
"NMGAT?VM"
)
x
<-
"Positive"
else
if
(
x
==
0
|
x
==
"negative"
|
x
==
"negativM"
|
x
==
"NMGATIVM"
|
x
==
"NAGATIVM"
|
x
==
"NMGATIV"
|
x
==
"negativeM"
|
x
==
"NAGAT?VM"
|
x
==
"NMGAT?V"
)
x
<-
"Negative"
}
if
(
colname
==
"SMXFEA"
){
x
<-
replaceNoYesValues
(
x
)
#if(is.na(x))
# x <- ""
#else if(x == 1)
# x <- "Yes"
#else if (x == 0)
# x <- "No"
#else if (x == ".")
# x <- ""
}
if
(
colname
==
"DMRRETH1"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"Asian"
else
if
(
x
==
2
)
x
<-
"Black"
else
if
(
x
==
3
)
x
<-
"Hispanic"
else
if
(
x
==
4
)
x
<-
"White"
else
if
(
x
==
5
)
x
<-
"Multiracial"
else
if
(
x
==
6
)
x
<-
"Other"
}
if
(
colname
==
"DMROCCU"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"Unemployed"
else
if
(
x
==
2
)
x
<-
"Student"
else
if
(
x
==
3
)
x
<-
"Employed"
else
if
(
x
==
4
)
x
<-
"Self-employed"
else
if
(
x
==
5
)
x
<-
"Retired"
else
if
(
x
==
6
)
x
<-
""
}
if
(
colname
==
"DMRHREDU"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"High School"
else
if
(
x
==
2
)
x
<-
"Bachelors"
else
if
(
x
==
3
)
x
<-
"Postgraduate"
else
if
(
x
==
4
)
x
<-
"Other"
}
if
(
colname
==
"DSXOS"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
0
|
x
==
"Recovered"
)
x
<-
"Recovered"
else
if
(
x
==
1
|
x
==
"Deceased"
)
x
<-
"Deceased"
else
if
(
x
==
2
|
x
==
"Transferred"
)
x
<-
"Transferred"
else
x
<-
""
}
if
(
colname
==
"CSXCOT"
){
if
(
is.na
(
x
))
x
<-
""
else
if
(
x
==
1
)
x
<-
"PCR"
else
if
(
x
==
2
)
x
<-
"antigen"
else
if
(
x
==
3
)
x
<-
"other"
else
x
<-
""
}
return
(
x
)
}
dotToBar
<-
function
(
x
){
if
(
grepl
(
"."
,
x
,
fixed
=
TRUE
))
res
<-
format
(
as.Date
(
x
,
format
=
"%d.%m.%Y"
),
"%d/%m/%Y"
)
else
res
<-
x
return
(
res
)
}
rm
(
newDf
)
newDf
<-
hospital
names
<-
colnames
(
hospital
)
for
(
j
in
1
:
ncol
(
hospital
)){
percentage
<-
trunc
(
j
/
ncol
(
hospital
)
*
100
)
mes
<-
paste
(
toString
(
percentage
),
"% completed"
,
sep
=
""
)
print
(
mes
)
print
(
names
[
j
])
for
(
i
in
1
:
nrow
(
hospital
)){
if
(
names
[
j
]
%in%
noYesValues
){
newDf
[
i
,
j
]
<-
replaceNoYesValues
(
hospital
[
i
,
j
])
}
else
if
(
!
(
names
[
j
]
%in%
categoric_vars
)
&
names
[
j
]
!=
"DMRBORN"
&
!
grepl
(
"DAT"
,
names
[
j
],
fixed
=
TRUE
)){
newDf
[
i
,
j
]
<-
fixNonCategoric
(
hospital
[
i
,
j
])
}
if
(
names
[
j
]
%in%
personalized
){
newDf
[
i
,
j
]
<-
personalizedFun
(
hospital
[
i
,
j
],
names
[
j
])
}
if
(
is.na
(
hospital
[
i
,
j
]))
newDf
[
i
,
j
]
<-
""
else
if
(
hospital
[
i
,
j
]
==
"."
)
newDf
[
i
,
j
]
<-
""
}
}
setwd
(
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/newRessources"
)
setwd
((
"C:/Users/Victor/Documents/TFG/r-analytics-master/ressources/new_res_baskent/outpatient/clean"
))
write.csv
(
x
=
newDf
,
file
=
"konya_outpatient.csv"
,
row.names
=
FALSE
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment