library(ggplot2)
##load dataset
res <- read.csv("~/Desktop/Toy house/survResponses.csv")
##change 0 to 2, change 1&2 to 0
for (i in 15:37){
res[,i][which(res[,i] == '2')] <- 'a'
res[,i][which(res[,i] == '1')] <- 'a'
res[,i][which(res[,i] == '0')] <- 'b'
res[,i][which(res[,i] == 'a')] <- '0'
res[,i][which(res[,i] == 'b')] <- '2'
}
##extract flavor data
flavor <- res[,15:37]
##delete all incomplete data
res = res[res$V10 == 1, ]
##delete the date that last more than 20 minutes or less than 1 minute
res$time = difftime(res$V9, res$V8, units = c('mins'))
res = res[which(res$time < 30 & res$time > 1), ]
##extract Greek data
Greek = res[res$Q2_1 >= 50, ]
measReach = function(data){
if(is.null(dim(data))){ #if data is a vector
ret = sum(data>0,na.rm=TRUE)/length(data)
} else if(ncol(data)==1){ #if data has only one column
ret = sum(data>0,na.rm=TRUE)/length(data)
}
else { #if data has multiple columns
ret = sum(apply(data>0,1,any),na.rm=TRUE)/nrow(data)
}
}
evalNext = function(nextSet,set,data,measure=measReach){
vals = numeric(length(nextSet)) #set up storage for return value
for(k in 1:length(nextSet)){#loop over the options in nextSet
if(length(set)==0){ #if no existing options
vals[k] = measure(data[,nextSet[k]])
} else { #if existing options
vals[k] = measure(data[,c(set,nextSet[k])])
}
}
vals
}
evalFull = function(fullSet,data,origSet=numeric(0),measure=measReach){
curSet = origSet; #the current set of included options
remSet = fullSet[!(fullSet%in%origSet)]; #the remaining set of options to consider
K = length(remSet)
optVals = numeric(K); #create storage for the optimal values (optVals)
ordSet = numeric(K); #create storage for ordered set
for(i in 1:K){ #loop over the remaining set consider
tmpVals = evalNext(remSet,curSet,data,measure); #calculate vector of next evaluations
k = which.max(tmpVals) #pick the option that gives max measure, note will pick first case if a tie!
optVals[i] = tmpVals[k] #add optimal value
ordSet[i] = remSet[k] #add index of option that creates optimal value
curSet = c(curSet,ordSet[i]); #add optimal next option to current set
remSet = remSet[-k]; #delete optimal next option from remaining set
}
#creaets a "TURF object" containing ordSet, optVals, origSet, origVal, measure, and pnames
turf = list(ordSet=ordSet,optVals=optVals,origSet=origSet,origVal=measure(data[,origSet]),measure=measure,pnames=colnames(data))
class(turf)="TURF" #makes the list into a TURF object so that can call plot.TURF
turf #return turf
}
plot.TURF=function(turf,...){
if(class(turf)!="TURF"){
cat("Object not a turf.")
} else {
df = with(turf,data.frame(vals = c(origVal,optVals),titles=paste(0:length(ordSet),c("Original",pnames[ordSet]),sep=":")))
#with(turf,barplot(c(origVal,optVals),names.arg=c("Original",pnames[ordSet])))
dodge = position_dodge(width=.75); ##to form constant dimensions positioning for all geom's
gp = ggplot(df,aes(y=vals,x=titles))
gp + geom_bar(position=dodge,stat="identity",col=1,fill=4,width=.75)
}
}
## delete all NA rows in flavor data
Greek2<-Greek[-which(apply(Greek[,15:37],1,function(x) all(is.na(x)))),]
##TURF Analysis
brandsPurch = Greek2[,15:37]
brandsPurch[is.na(brandsPurch)] = 0
names(brandsPurch)=c("Almond", "Banana", "Black Cherry", "Blueberry", "Caramel", "Chai", "Chocolate", "Cinnamon", "Coconut", "Honey", "Key Lime Pie", "Lemon", "Mango", "Mapel", "Peach", "Pineapple", "Plain", "Pomegranate", "Raspberry", "Strawberry", "StrawBan", "Vanilla", "VaniBan")
turf = evalFull(c(1:23),brandsPurch,c(4,20,15,22,17,10))
plot(turf)

## all yorgut #######
## delete all NA rows
allyogurt<-res[-which(apply(res[,15:37],1,function(x) all(is.na(x)))),]
##TURF Analysis
brandsPurch2 = allyogurt[,15:37]
brandsPurch2[is.na(brandsPurch2)] = 0
names(brandsPurch2)=c("Almond", "Banana", "Black Cherry", "Blueberry", "Caramel", "Chai", "Chocolate", "Cinnamon", "Coconut", "Honey", "Key Lime Pie", "Lemon", "Mango", "Mapel", "Peach", "Pineapple", "Plain", "Pomegranate", "Raspberry", "Strawberry", "StrawBan", "Vanilla", "VaniBan")
turf2 = evalFull(c(1:23),brandsPurch2,c(4,20,15,22,17,10))
plot(turf2)
