Commit d1a6b0a7 authored by Lucia Prieto's avatar Lucia Prieto

Code and tables uploaded

parent 9d531967
library(e1071)
library(foreach)
library(caret)
library(dplyr)
library(UBL)
library(DMwR)
evaluations <- read.csv("E:/mavis/data/tweets.csv")
summary.factor(evaluations$IBM.Adap)
summary.factor(evaluations$GC.Adapt)
summary.factor(evaluations$MC.Adapt)
summary.factor(evaluations$X.3)
summary.factor(evaluations$X)
# Data Preparation ----
evaluations <- evaluations %>% filter(!is.na(TWEET.ID))
evaluations$MC.Orig <- ifelse(evaluations$MC.Orig == "NONE", "NEU", as.character(evaluations$MC.Orig))
evaluations$MC.Orig <- as.numeric(as.factor(evaluations$MC.Orig))
evaluations$X <- factor(evaluations$X, levels = c("NEGATIVO","NO NEGATIVO"))
evaluations$X.3 <- factor(evaluations$X.3, levels = c("NEGATIVO","NO NEGATIVO"))
levels(evaluations$X) <- c("NEGATIVO","NO_NEGATIVO")
levels(evaluations$X.3) <- c("NEGATIVO","NO_NEGATIVO")
col_id_targets <- c(17, 21)
# select data set
#col_id_predictors <- c(8,10,12) # orig
#col_id_predictors <- c(9, 11, 13) # adapt
#col_id_predictors <- c(8, 9, 10, 11, 12, 13) # both
#df <- evaluations[,c(col_id_predictors, col_id_targets)]
#df <- df %>% select(-X.3) # 5 EV
#df <- df %>% select(-X) # 3 EV
#names(df)[ncol(df)] <- "target"
#final_results <- modelate(df, "3 EV", "ORIG", "")
#final2_results <- data.frame()
conj = c(1:10)
evs = c('3 EV',"5 EV")
preds = c("ORIG","ADAP","BOTH")
for(ev in evs){
for(p in preds){
for (c in conj){
method = paste("DOWN-rndm", c)
final2_results <- rbind(final2_results, modelate(evaluations, ev, p, method))
}}}
final_results <- rbind(final_results, modelate(evaluations, "5 EV", "ORIG", " DOWN-clust"))
fn <- final_results[!grepl("UP-rndm", final_results$method),]
final_results <- fn
modelate <- function(df, evaluadores_label, predictores_label, extra_label){
if(predictores_label == "ORIG"){print('ORIG');col_id_predictors <- c(8,10,12)}
else if(predictores_label == "ADAP"){print('ADAP');col_id_predictors <- c(9, 11, 13)}
else if(predictores_label == "BOTH"){print('BOTH');col_id_predictors <- c(8, 9, 10, 11, 12, 13)}
df <- df[,c(col_id_predictors, col_id_targets)]
if(evaluadores_label == '5 EV'){print('5 EV');df <- df %>% select(-X.3)}
else if(evaluadores_label == '3 EV'){print('3 EV');df <- df %>% select(-X)}
names(df)[ncol(df)] <- "target"
# data prep
#set.seed(1)
df <- df %>% filter(complete.cases(df))
df$id <- seq(1,nrow(df),1)
ds <- df
ds <- ds %>% select(-id)
# tecnica de balanceo
set.seed(round(runif(1, 0, 2000)))
ds <- under_training <- ds %>% group_by(target) %>% sample_n(128) # 142 numero de negativos por 5 evaluadores
# over_training <- ds[ds$target=="NEGATIVO",]
# ids <- runif((200/100) * nrow(over_training), 1, nrow(over_training))
# over_training_plus <- over_training[ids, ]
# ds <- over_training <- rbind(over_training, over_training_plus, ds[ds$target!="NEGATIVO", ])
# under_training <- ds[ds$target=="NO_NEGATIVO",]
# under_training <- under_training %>% select(-target) %>% filter(complete.cases(under_training))
# under_training <- kmeans(under_training, 441)
# under_training <- as.data.frame(under_training$centers)
# under_training$target <- "NO_NEGATIVO"
# ds <- under_training <- rbind(under_training, ds[ds$target!="NO_NEGATIVO", ])
# over_training <- SMOTE(target ~ ., data = ds, perc.over = 200)
# over_training <- over_training[over_training$target=="NEGATIVO",]
# ds <- over_training <- rbind(over_training, ds[ds$target!="NEGATIVO", ])
# ds <- AdasynClassif(target~., ds, beta=1)
inTraining <- createDataPartition(ds$target, p = 0.50, list = TRUE)
training <- ds[ inTraining$Resample1,]
testing <- ds[ -inTraining$Resample1,]
# algorithm application
fitControl <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 3,
classProbs=TRUE,
summaryFunction = twoClassSummary
)
methods = c('rf', 'C5.0', 'svmLinear' ,'bayesglm', 'LogitBoost', 'mlpWeightDecayML')
results <- foreach(method = methods,
.combine = 'rbind') %do% {
print(method)
model <- train(target ~ ., data = training,
method = method,
trControl = fitControl,
preProc = c("center", "scale"),
metric="ROC",
tuneLength = 3)
rocs = model$resample$ROC
repetition1 = mean(rocs[1:10])
repetition2 = mean(rocs[11:20])
repetition3 = mean(rocs[21:30])
mean_acc = mean(c(repetition1, repetition2, repetition3))
sd_acc = sd(c(repetition1, repetition2, repetition3))
method_label = paste(method, extra_label)
data.frame(
evaluadores = evaluadores_label,
predictores = predictores_label,
method = method_label,
mean_acc = round(mean_acc,2),
sd_acc = round(sd_acc,2)
)
}
print(results)
results
}
write.csv2(final2_results, 'E:/mavis/experiment_results_only_RNDM.csv', row.names = F)
save.image('E:/mavis/experiments.RData')
load('E:/mavis/experiments.RData')
method = 'rf DOWN'
ev = '5'
mean(final2_results$mean_acc[grepl(method, final2_results$method)&
grepl("ORIG",final2_results$predictores)&
grepl(ev,final2_results$evaluadores)])
mean(final2_results$mean_acc[grepl(method, final2_results$method)&
grepl("ADAP",final2_results$predictores)&
grepl(ev,final2_results$evaluadores)])
mean(final2_results$mean_acc[grepl(method, final2_results$method)&
grepl("BOTH",final2_results$predictores)&
grepl(ev,final2_results$evaluadores)])
install_and_load_dependencies <- function(libs){
installed_packages <- installed.packages()
installed_packages.names <- installed_packages[,1]
for(lib in libs) {
if(lib %in% installed_packages.names){ library(lib, character.only = TRUE) }
else{
install.packages(lib)
library(lib, character.only = TRUE)
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment