-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcode and charts.Rmd
175 lines (146 loc) · 8.48 KB
/
code and charts.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
---
title: "The World of Starbucks Muggers"
author: "Amy Tzu-Yu Chen"
date: "August 14, 2016"
output: html_document
---
```{r load data, echo=TRUE, message=FALSE, warning=FALSE}
mug <- read.csv("https://raw.githubusercontent.com/amy17519/FredorangeMuggers/master/FredorangeMug.csv")
user <- read.csv("https://raw.githubusercontent.com/amy17519/FredorangeMuggers/master/FredorangeUser.csv")
```
```{r geographic distribution of mug products and collectors, echo=TRUE, fig.height=7, fig.width=10, message=FALSE, warning=FALSE}
library(dplyr)
library(plotly)
library(countrycode)
mug_country<-as.data.frame(mug %>%
group_by(Country) %>%
summarise(Quantity = length(Country)))
user_country<-as.data.frame(user %>%
group_by(Country) %>%
summarise(People = length(Country)))
#take out users who did not indicate country. There are 1646 of them.
user_country<-user_country[-which(user_country$Country=='undefined country'),]
mug_country$CountryCode<-countrycode(mug_country$Country,"country.name", "iso3c")
user_country$CountryCode<-countrycode(user_country$Country,"country.name", "iso3c")
mug_country_noUSA<-mug_country[-which(mug_country$Country=='USA'),]
user_country_noUSA<-user_country[-which(user_country$Country=='usa'),]
#plotly maps
l <- list(color = toRGB("gray85"), width = 0.5)
g <- list(showframe = FALSE,showcoastlines = FALSE,
projection = list(type ='equirectangular'))
plot_ly(mug_country, z = Quantity, text = Country,
locations = CountryCode, type = 'choropleth',
color = Quantity, colors = 'Oranges', marker = list(line = l),
colorbar = list(title = '# of Products')) %>%
layout(title = 'Geographic Distribution of Mug Products', geo = g)
plot_ly(mug_country_noUSA, z = Quantity, text = Country,
locations = CountryCode, type = 'choropleth',
color = Quantity, colors = 'Oranges', marker = list(line = l),
colorbar = list(title = '# of Products')) %>%
layout(title = 'Geographic Distribution of Mug Products, excluding USA', geo = g)
plot_ly(user_country, z = People, text = Country,
locations = CountryCode, type = 'choropleth',
color = People, colors = 'YlGnBu', marker = list(line = l),
colorbar = list(title = '# of Collectors')) %>%
layout(title = 'Geographic Distribution of Mug Collectors', geo = g)
plot_ly(user_country_noUSA, z = People, text = Country,
locations = CountryCode, type = 'choropleth',
color = People, colors = 'GnBu', marker = list(line = l),
colorbar = list(title = '# of Collectors')) %>%
layout(title = 'Geographic Distribution of Mug Collectors, excluding USA', geo = g)
```
```{r unsupervised kmeans clustering, echo=TRUE, fig.height=7, fig.width=10, message=FALSE, warning=FALSE}
#choosing k for k-means clustering, k=4
wss <- (nrow(mug[,7:9])-1)*sum(apply(mug[,7:9],2,var))
for (i in 2:10) wss[i] <- sum(kmeans(mug[,7:9],
centers=i)$withinss)
plot(1:10, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")
#wss <- (nrow(mug[,14:16])-1)*sum(apply(mug[,14:16],2,var))
#for (i in 2:10) wss[i] <- sum(kmeans(mug[,14:16],
# centers=i)$withinss)
#plot(1:10, wss, type="b", xlab="Number of Clusters",
# ylab="Within groups sum of squares")
set.seed(123)
#mug$test1<-mug$Seeker/max(mug$Seeker)
#mug$test2<-mug$Trader/max(mug$Trader)
#mug$test3<-mug$Owner/max(mug$Owner)
mugCluster <- kmeans(mug[, 7:9], 5, nstart = 100)
#mugCluster <- kmeans(mug[, 14:16], 5, nstart = 5000)
#observe patterns in each cluster, rename clusters by their characteristics, then #use it as a new variable: Difficulty
mugCluster$centers
mugCluster$cluster <- factor(mugCluster$cluster,levels=c(2,1,5,4,3))
levels(mugCluster$cluster) <- c("Inconclusive","Easy to Find Mugs",'Medium Difficulty',
"Hard to Get Mugs","Very Hard to Get Mugs")
mug$Difficulty <- mugCluster$cluster
plot_ly(data = mug, x = Owner, y = Seeker, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader",Trader, "<br>Difficulty: ",Difficulty),
color = mugCluster$cluster,
colors =c('olivedrab','navyblue','indianred2','darkgoldenrod1','magenta4')) %>%
layout(title='K-means Clustering: # of Seekers vs. # of Owners')
plot_ly(data = mug, x = Seeker, y = Trader, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader",Trader, "<br>Difficulty: ",Difficulty),
color = mugCluster$cluster,
colors =c('olivedrab','navyblue','indianred2','darkgoldenrod1','magenta4')) %>%
layout(title='K-means Clustering: # of Trader vs. # of Seekers')
plot_ly(data = mug, x = Owner, y = Trader, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader",Trader, "<br>Difficulty: ",Difficulty),
color = mugCluster$cluster,
colors =c('olivedrab','navyblue','indianred2','darkgoldenrod1','magenta4')) %>%
layout(title='K-means Clustering: # of Trader vs. # of Owner')
```
```{r popular editions, echo=TRUE, fig.height=7, fig.width=10, message=FALSE, warning=FALSE}
#view editions and countries with most hard to find and collectible mugs
#table(mug$Edition, mugCluster$cluster)
popular_editions<-mug[mug$Edition %in% c("08 Icon Edition","13 You Are Here Series",
"Japan Country Series",'Relief Series'),]
popular_editions$Edition<-as.factor(as.character(popular_editions$Edition))
plot_ly(data = popular_editions, x = Popularity , y = Scarcity, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Edition,
colors = c('olivedrab3','navyblue','indianred2','darkgoldenrod1')) %>%
layout(title='Collectible Editions: Scarcity vs. Popularity',
xaxis=list(title='Popularity: Seeker/max(Seeker)'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))
plot_ly(data = popular_editions, x =Scarcity , y =WillingnessToTrade ,
mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Edition,
colors = c('olivedrab3','navyblue','indianred2','darkgoldenrod1')) %>%
layout(title='Collectible Editions: Willingness to Trade vs. Scarcity')
```
```{r popular countries, echo=TRUE, fig.height=7, fig.width=10, message=FALSE, warning=FALSE}
table(mug$Country, mugCluster$cluster)
popular_country<-mug[mug$Country %in%
c("Canada","China","Germany","Mexico","Russia","USA"),]
popular_country$Country<-as.factor(as.character(popular_country$Country))
plot_ly(data = popular_country, x = Popularity , y = Scarcity, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Country,
colors = c('#F16745','#404040','#7BC8A4','#4CC3D9',"#93648D","#FFC65D")) %>%
layout(title='Collectible Countries: Scarcity vs. Popularity',
xaxis=list(title='Popularity: Seeker/max(Seeker)'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))
plot_ly(data = popular_country, x =WillingnessToTrade, y =Scarcity ,
mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Country,
colors = c('#F16745','#404040','#7BC8A4','#4CC3D9',"#93648D","#FFC65D")) %>%
layout(title='Collectible Countries: Willingness to Trade vs. Scarcity',
xaxis=list(title='Willingness To Trade'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))
```