rm(list = ls())
library("dummies")
## dummies-1.5.6 provided by Decision Patterns
library("AER")
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library("plotly")
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library('RColorBrewer')
library("rgl")
library("data.table")
library("mlogit")
## Loading required package: Formula
library("gmnl")
## Loading required package: maxLik
## Loading required package: miscTools
## 
## Please cite the 'maxLik' package as:
## Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1.
## 
## If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site:
## https://r-forge.r-project.org/projects/maxlik/
pricedata <- read.csv("kiwi_bubbles_P2.csv")
##data clean
pricedata = pricedata[pricedata$price.KB != 99, ]
pricedata = pricedata[pricedata$price.KR != 99, ]
pricedata = pricedata[pricedata$price.MB != 99, ]
##run regression
mlogitdata <- mlogit.data(pricedata, id = "id", varying = 4:7, choice = "choice", shape = "wide")
mle = gmnl(choice~price, data = mlogitdata)
summary(mle)
## 
## Model estimated on: Wed Feb 12 22:21:42 2020 
## 
## Call:
## gmnl(formula = choice ~ price, data = mlogitdata, method = "nr")
## 
## Frequencies of categories:
## 
##       0      KB      KR      MB 
## 0.41564 0.18035 0.20039 0.20362 
## 
## The estimation took: 0h:0m:1s 
## 
## Coefficients:
##                Estimate Std. Error z-value  Pr(>|z|)    
## KB:(intercept)  4.25316    0.32821  12.959 < 2.2e-16 ***
## KR:(intercept)  4.36240    0.32945  13.241 < 2.2e-16 ***
## MB:(intercept)  4.20440    0.31331  13.419 < 2.2e-16 ***
## price          -3.73793    0.23671 -15.791 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Optimization of log-likelihood by Newton-Raphson maximisation
## Log Likelihood: -1909
## Number of observations: 1547
## Number of iterations: 4
## Exit of MLE: gradient close to zero
##unit cost
uc = 0.5

##calculate the optimal price for two products
############################Q 3 #####################################
demand=function(priceKB,priceKR,para){
    probKB=exp(para[1]+para[3]*priceKB)/(1+exp(para[1]+para[3]*priceKB)+exp(para[2]+para[3]*priceKR)+exp(para[4] +para[3]*priceMB))
    probKR=exp(para[2]+para[3]*priceKR)/(1+exp(para[1]+para[3]*priceKB)+exp(para[2]+para[3]*priceKR)+exp(para[4] +para[3]*priceMB))
    return(cbind(probKB,probKR))
}

profit=function(priceKB,priceKR,para){
    profitKB=demand(priceKB,priceKR,para)[,1]*(priceKB-uc)
    profitKR=demand(priceKB,priceKR,para)[,2]*(priceKR-uc)
    return(cbind(profitKB,profitKR))
}

#Set parameter
#The first element of "para" is beta0KB, the second element is beta0KR and the third beta1 and fourth element is beta0MB.
para=c(4.25316,4.36240,-3.73793,4.20440)

aux=seq(0,3,0.01)

pricespace=expand.grid(aux,aux)

profitmat=matrix(0L,nrow(pricespace),1)
priceMB = 1.43
for (i in 1:nrow(pricespace)){
    profitmat[i]=sum(profit(pricespace[i,1],pricespace[i,2],para))  
}

#Draw figure
xaxis=list(title="P^{KB}")
yaxis=list(autorange = "reversed",title="P^{KR}")
zaxis=list(title="Profit")
p=plot_ly(x=pricespace[,1],y=pricespace[,2],z=as.numeric(profitmat),
          type="scatter3d",mode="markers",
          marker = list(color = as.numeric(profitmat), colorscale = c('#FFE1A1', '#683531'), showscale = TRUE))%>%
    layout(scene=list(xaxis=xaxis,yaxis=yaxis,zaxis=zaxis))%>%
    config(mathjax = 'cdn')
p
x=pricespace[,1]
y=pricespace[,2]
z=as.numeric(profitmat)
##price of KB
x[which.max(z)]
## [1] 1.16
##price of KR
y[which.max(z)]
## [1] 1.16
###############Q 4############################

#################################with KB & KR###############################
set.seed(0)
# estimate multinomial logit segment-by-segment, where segmentation is defined by demographics
coef_noseg=mle$coefficients

demo = read.csv("demo_P2.csv")

#Clustering
demo_cluster = kmeans(x=demo[, 2:18], centers = 9, nstart = 1000)
# now combine cluster identity into the raw data
cluster_id = data.frame(id = demo$id)
cluster_id$cluster = demo_cluster$cluster
data = merge(pricedata, cluster_id, by = "id", all.x = T)
# for those who don't fit in any cluster, group them into one additional cluster
data$cluster[is.na(data$cluster)] = 10
## customer number
N = 359
# segment share
seg.share = c( table(demo_cluster$cluster),N - sum(table(demo_cluster$cluster))) / N
coef.est = data.frame(segment = 1:10, intercept.KB = NA, intercept.KR = NA, 
                      intercept.MB = NA, price.coef = NA) 
for (seg in 1:10) {
    # During each loop, pick subset of data of consumers from each segment.
    data.sub = subset(data, cluster == seg)
    
    #Using that data, the rest remains the same.
    mlogitdata=mlogit.data(data.sub,id="id",varying=4:7,choice="choice",shape="wide")
    
    #Run MLE.
    mle= gmnl(choice ~  price, data = mlogitdata)
    mle
    #Store the outcome in the coef.est matrix.
    coef.est[seg, 2:5] = mle$coefficients
}

#######KB&KR
plot(coef.est[1,2]-coef.est[1,3],coef.est[1,5],cex=20*seg.share[1],xlim=c(-3,3),ylim=c(-9,-1.5),
     col = "chocolate",pch=16,cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,
     xlab="beta_0^KB-beta_0^KR",ylab=("beta_1"))
points(coef.est[2,2]-coef.est[2,3],coef.est[2,5],cex=20*seg.share[2],col = "blue",pch=16)
points(coef.est[3,2]-coef.est[3,3],coef.est[3,5],cex=20*seg.share[3],col = "red",pch=16)
points(coef.est[4,2]-coef.est[4,3],coef.est[4,5],cex=20*seg.share[4],col = "chocolate",pch=16)
points(coef.est[5,2]-coef.est[5,3],coef.est[5,5],cex=20*seg.share[5],col = "light blue",pch=16)
points(coef.est[6,2]-coef.est[6,3],coef.est[6,5],cex=20*seg.share[2],col = "pink",pch=16)
points(coef.est[7,2]-coef.est[7,3],coef.est[7,5],cex=20*seg.share[3],col = "orange",pch=16)
points(coef.est[8,2]-coef.est[8,3],coef.est[8,5],cex=20*seg.share[4],col = "purple",pch=16)
points(coef.est[9,2]-coef.est[9,3],coef.est[9,5],cex=20*seg.share[5],col = "black",pch=16)
points(coef.est[10,2]-coef.est[10,3],coef.est[10,5],cex=20*seg.share[5],col = "green",pch=16)

###KB & MB
plot(coef.est[1,2]-coef.est[1,4],coef.est[1,5],cex=20*seg.share[1],xlim=c(-3,3),ylim=c(-9,-1.5),
     col = "chocolate",pch=16,cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,
     xlab="beta_0^KB-beta_0^MB",ylab=("beta_1"))
points(coef.est[2,2]-coef.est[2,4],coef.est[2,5],cex=20*seg.share[2],col = "blue",pch=16)
points(coef.est[3,2]-coef.est[3,4],coef.est[3,5],cex=20*seg.share[3],col = "red",pch=16)
points(coef.est[4,2]-coef.est[4,4],coef.est[4,5],cex=20*seg.share[4],col = "chocolate",pch=16)
points(coef.est[5,2]-coef.est[5,4],coef.est[5,5],cex=20*seg.share[5],col = "light blue",pch=16)
points(coef.est[6,2]-coef.est[6,4],coef.est[6,5],cex=20*seg.share[2],col = "pink",pch=16)
points(coef.est[7,2]-coef.est[7,4],coef.est[7,5],cex=20*seg.share[3],col = "orange",pch=16)
points(coef.est[8,2]-coef.est[8,4],coef.est[8,5],cex=20*seg.share[4],col = "purple",pch=16)
points(coef.est[9,2]-coef.est[9,4],coef.est[9,5],cex=20*seg.share[5],col = "black",pch=16)
points(coef.est[10,2]-coef.est[10,4],coef.est[10,5],cex=20*seg.share[5],col = "green",pch=16)

###KR & MB
plot(coef.est[1,3]-coef.est[1,4],coef.est[1,5],cex=20*seg.share[1],xlim=c(-3,3),ylim=c(-9,-1.5),
     col = "chocolate",pch=16,cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,
     xlab="beta_0^KR-beta_0^MB",ylab=("beta_1"))
points(coef.est[2,3]-coef.est[2,4],coef.est[2,5],cex=20*seg.share[2],col = "blue",pch=16)
points(coef.est[3,3]-coef.est[3,4],coef.est[3,5],cex=20*seg.share[3],col = "red",pch=16)
points(coef.est[4,3]-coef.est[4,4],coef.est[4,5],cex=20*seg.share[4],col = "chocolate",pch=16)
points(coef.est[5,3]-coef.est[5,4],coef.est[5,5],cex=20*seg.share[5],col = "light blue",pch=16)
points(coef.est[6,3]-coef.est[6,4],coef.est[6,5],cex=20*seg.share[2],col = "pink",pch=16)
points(coef.est[7,3]-coef.est[7,4],coef.est[7,5],cex=20*seg.share[3],col = "orange",pch=16)
points(coef.est[8,3]-coef.est[8,4],coef.est[8,5],cex=20*seg.share[4],col = "purple",pch=16)
points(coef.est[9,3]-coef.est[9,4],coef.est[9,5],cex=20*seg.share[5],col = "black",pch=16)
points(coef.est[10,3]-coef.est[10,4],coef.est[10,5],cex=20*seg.share[5],col = "green",pch=16)

##10 cluster is the best 
demandKB_seg=function(priceKB,priceKR,priceMB,para){
    prob=exp(para[1]+para[4]*priceKB)/(1+exp(para[1]+para[4]*priceKB)+exp(para[2]+para[4]*priceKR)+exp(para[3]+para[4]*priceMB))
    return(prob)
}


demandKR_seg=function(priceKB,priceKR,priceMB,para){
    prob=exp(para[2]+para[4]*priceKR)/(1+exp(para[1]+para[4]*priceKB)+exp(para[2]+para[4]*priceKR)+exp(para[3]+para[4]*priceMB))
    return(prob)
}


agg_choiceKB=function(priceKB,priceKR,priceMB) {
    agg_choice=seg.share[1]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[1,2:5]))+
        seg.share[2]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[2,2:5]))+
        seg.share[3]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[3,2:5]))+
        seg.share[4]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[4,2:5]))+
        seg.share[5]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[5,2:5]))+
        seg.share[6]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[6,2:5]))+
        seg.share[7]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[7,2:5]))+
        seg.share[8]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[8,2:5]))+
        seg.share[9]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[9,2:5]))+
        seg.share[10]*demandKB_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[10,2:5]))
    return(agg_choice)
}

agg_choiceKR=function(priceKB,priceKR,priceMB) {
    agg_choice=seg.share[1]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[1,2:5]))+
        seg.share[2]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[2,2:5]))+
        seg.share[3]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[3,2:5]))+
        seg.share[4]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[4,2:5]))+
        seg.share[5]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[5,2:5]))+
        seg.share[6]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[6,2:5]))+
        seg.share[7]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[7,2:5]))+
        seg.share[8]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[8,2:5]))+
        seg.share[9]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[9,2:5]))+
        seg.share[10]*demandKR_seg(priceKB,priceKR,priceMB,as.numeric(coef.est[10,2:5]))
    return(agg_choice)
}

uc=0.5

pricespace <- seq(0.8,1.5,0.001)
goodaggpkb <- 0
goodaggpkr <- 0
aggprofit <- 0
for(pkb in pricespace){
    
        profit=1000*agg_choiceKB(pkb, pricespace, 1.43)*(pkb-0.5)+1000*agg_choiceKR(pkb,pricespace,1.43)*(pricespace-0.5) 
        maxprofit <- max(profit)
        if(maxprofit > aggprofit){
            goodaggpkb <- pkb
            aggprofit <- maxprofit
            goodaggpkr <- pricespace[which.max(profit)]
        }
    
}
########################KR only######################

demandKR_only=function(priceKR,priceMB,para){
    prob=exp(para[2]+para[4]*priceKR)/(1+exp(para[2]+para[4]*priceKR)+exp(para[3]+para[4]*priceMB))
    return(prob)
}

agg_choiceKR_only=function(priceKR,priceMB) {
    agg_choice=seg.share[1]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[1,2:5]))+
        seg.share[2]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[2,2:5]))+
        seg.share[3]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[3,2:5]))+
        seg.share[4]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[4,2:5]))+
        seg.share[5]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[5,2:5]))+
        seg.share[6]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[6,2:5]))+
        seg.share[7]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[7,2:5]))+
        seg.share[8]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[8,2:5]))+
        seg.share[9]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[9,2:5]))+
        seg.share[10]*demandKR_only(priceKR,priceMB,as.numeric(coef.est[10,2:5]))
    return(agg_choice)
}


#profit
uc=0.5

goodaggpkr_only <- 0
aggprofit_only <- 0

for(pkr in seq(0.5,3,0.001)){
    profit=1000*agg_choiceKR_only (pkr, 1.43)*(pkr - uc)
    if(profit > aggprofit_only){
        aggprofit_only <- profit
        goodaggpkr_only <- pkr
        
    }
}

aggprofit_only
##        1 
## 290.5224
goodaggpkr_only
## [1] 1.065
demandMB1=function(priceKR,priceMB,para){
    prob=exp(para[3]+para[4]*priceMB)/(1+exp(para[2]+para[4]*priceKR) + exp(para[3]+para[4]*priceMB))
    return(prob)
}

agg_choiceMB1=function(priceKR,priceMB) {
    
    agg_choice=seg.share[1]*demandMB1(priceKR,priceMB,as.numeric(coef.est[1,2:5]))+
        seg.share[2]*demandMB1(priceKR,priceMB,as.numeric(coef.est[2,2:5]))+
        seg.share[3]*demandMB1(priceKR,priceMB,as.numeric(coef.est[3,2:5]))+
        seg.share[4]*demandMB1(priceKR,priceMB,as.numeric(coef.est[4,2:5]))+
        seg.share[5]*demandMB1(priceKR,priceMB,as.numeric(coef.est[5,2:5]))+
        seg.share[6]*demandMB1(priceKR,priceMB,as.numeric(coef.est[6,2:5]))+
        seg.share[7]*demandMB1(priceKR,priceMB,as.numeric(coef.est[7,2:5]))+
        seg.share[8]*demandMB1(priceKR,priceMB,as.numeric(coef.est[8,2:5]))+
        seg.share[9]*demandMB1(priceKR,priceMB,as.numeric(coef.est[9,2:5]))+
        seg.share[10]*demandMB1(priceKR,priceMB,as.numeric(coef.est[10,2:5]))
    
    return(agg_choice)
}

profitMB1=1000*agg_choiceMB1(1.06, 1.43)*(1.43-uc)
profitMB1
##        1 
## 106.5322
###############Q 5##########################
#######KB, KR & MB###################
demandMB=function(priceKB,priceKR,priceMB,para){
    prob=exp(para[3]+para[4]*priceMB)/(1+exp(para[1]+para[4]*priceKB)+exp(para[2]+para[4]*priceKR)+exp(para[3]+para[4]*priceMB))
    return(prob)
}

agg_choiceMB=function(priceKB,priceKR,priceMB) {
    agg_choice=seg.share[1]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[1,2:5]))+
        seg.share[2]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[2,2:5]))+
        seg.share[3]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[3,2:5]))+
        seg.share[4]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[4,2:5]))+
        seg.share[5]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[5,2:5]))+
        seg.share[6]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[6,2:5]))+
        seg.share[7]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[7,2:5]))+
        seg.share[8]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[8,2:5]))+
        seg.share[9]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[9,2:5]))+
        seg.share[10]*demandMB(priceKB,priceKR,priceMB,as.numeric(coef.est[10,2:5]))
        
    
    return(agg_choice)
}

profitMB1 = 1000*agg_choiceMB(1.155, 1.188, 1.43)*(1.43-0.5)


goodaggpmb1 <- 0
aggprofit1 <- 0
for(pmb1 in seq(0.8,1.43,0.001)){
        profit1=1000*agg_choiceMB(1.155, 1.188, pmb1)*(pmb1-0.5) 
        if(profit1 > aggprofit1){
            aggprofit1 <- profit1 ##180.2245
            goodaggpmb1 <- pmb1
            
    }
}

goodaggpkb2 <- 0
goodaggpkr2 <- 0
aggprofit2 <- 0
for(pkb2 in pricespace){
    
    profit2=1000*agg_choiceKB(pkb2, pricespace, 0.957)*(pkb2-0.5)+1000*agg_choiceKR(pkb2,pricespace,0.957)*(pricespace-0.5) 
    maxprofit2 <- max(profit2)
    if(maxprofit2 > aggprofit2){
        goodaggpkb2 <- pkb2##1.024
        aggprofit2 <- maxprofit2##276.9256
        goodaggpkr2 <- pricespace[which.max(profit2)]##1.08
        }
    
}

goodaggpmb3 <- 0
aggprofit3 <- 0
for(pmb3 in seq(0.5,1.8,0.001)){
    profit3=1000*agg_choiceMB(1.024, 1.08, pmb3)*(pmb3-0.5) 
    if(profit3 > aggprofit3){
        aggprofit3 <- profit3 ##147.46
        goodaggpmb3 <- pmb3##0.918
        
    }
}




goodaggpkb4 <- 0
goodaggpkr4 <- 0
aggprofit4 <- 0
for(pkb4 in pricespace){
    
    profit4=1000*agg_choiceKB(pkb4, pricespace, 0.918)*(pkb4-0.5)+1000*agg_choiceKR(pkb4,pricespace,0.918)*(pricespace-0.5) 
    maxprofit4 <- max(profit4)
    if(maxprofit4 > aggprofit4){
        goodaggpkb4 <- pkb2
        aggprofit4 <- maxprofit4
        goodaggpkr4 <- pricespace[which.max(profit4)]
    }
    
}




goodaggpmb5 <- 0
aggprofit5 <- 0
for(pmb5 in seq(0.5,1.8,0.001)){
    profit5=1000*agg_choiceMB(1.01, 1.069, pmb5)*(pmb5-0.5) 
    if(profit5 > aggprofit5){
        aggprofit5 <- profit5##144.40
        goodaggpmb5 <- pmb5##0.914
        
    }
}



goodaggpkb6 <- 0
goodaggpkr6 <- 0
aggprofit6<- 0
for(pkb6 in pricespace){
    
    profit6=1000*agg_choiceKB(pkb6, pricespace, 0.914)*(pkb6-0.5)+1000*agg_choiceKR(pkb6,pricespace,0.914)*(pricespace-0.5) 
    maxprofit6 <- max(profit6)
    if(maxprofit6 > aggprofit6){
        goodaggpkb6 <- pkb6
        aggprofit6 <- maxprofit6
        goodaggpkr6 <- pricespace[which.max(profit6)]
    }
    
}




goodaggpmb7 <- 0
aggprofit7 <- 0
for(pmb7 in seq(0.5,1.8,0.001)){
    profit7=1000*agg_choiceMB(1.009, 1.068, pmb7)*(pmb7-0.5) 
    if(profit7 > aggprofit7){
        aggprofit7 <- profit7##144.13
        goodaggpmb7 <- pmb7##0.914
        
    }
}





goodaggpkb8 <- 0
goodaggpkr8 <- 0
aggprofit8<- 0
for(pkb8 in pricespace){
    
    profit8 = 1000*agg_choiceKB(pkb8, pricespace, 0.914)*(pkb8-0.5)+1000*agg_choiceKR(pkb8,pricespace,0.914)*(pricespace-0.5) 
    maxprofit8 <- max(profit8)
    if(maxprofit8 > aggprofit8){
        goodaggpkb8 <- pkb8
        aggprofit8 <- maxprofit8
        goodaggpkr8 <- pricespace[which.max(profit8)]
    }
    
}







goodaggpmb9 <- 0
aggprofit9 <- 0
for(pmb9 in seq(0.5,1.8,0.001)){
    profit9=1000*agg_choiceMB(1.009, 1.068, pmb9)*(pmb9-0.5) 
    if(profit9 > aggprofit9){
        aggprofit9 <- profit9##144.13
        goodaggpmb9 <- pmb9##0.914
        
    }
}