forked from DataKind-SF/datadive_201503_techsoup-global
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdive.R
60 lines (49 loc) · 2.65 KB
/
dive.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
df_orig = read.csv("techsoup_datakind_sample.csv")
df_orig$org = factor(df$org)
df_orig$org_type = factor(df$org_type)
#-----------------------------------------------------------------------
#Revenue by OrgSubType
#-----------------------------------------------------------------------
filter = !(is.na(df$revenue)) & (df$revenue >= 0)
df = df_orig[filter, ]
df$revenue = df$revenue/1000
revenue_org_subtype = tapply(df$revenue, df$org_subtype, sum)
revenue_org_stype=data.frame(cbind(attributes(revenue_org_subtype)$dimnames[[1]],
revenue_org_subtype))
rownames(revenue_org_stype)=c(1:nrow(revenue_org_stype))
colnames(revenue_org_stype) = c("orgsubtype", "revenue")
revenue_org_stype$revenue = as.numeric(as.character(revenue_org_stype$revenue))
revenue_org_stype = revenue_org_stype[order(-revenue_org_stype$revenue), ]
top.revenue = c(1:10)
barplot(revenue_org_stype[top.revenue,]$revenue, names.arg=revenue_org_stype[top.revenue,]
$orgsubtype, xlab="Org SubType", ylab="Revenue (in thousand USD)", col="red")
#------------------------------------------------------------------------
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
#Revenue by OrgType
#-----------------------------------------------------------------------
filter = !(is.na(df$revenue)) & (df$revenue >= 0)
df = df_orig[filter, ]
df$revenue = df$revenue/1000
revenue_org_type = tapply(df$revenue, df$org_type, sum)
revenue_org_type = data.frame(cbind(attributes(revenue_org_type)$dimnames[[1]],
revenue_org_type))
rownames(revenue_org_type) = c(1:nrow(revenue_org_type))
colnames(revenue_org_type) = c("orgtype", "revenue")
revenue_org_type$revenue = as.numeric(as.character(revenue_org_type$revenue))
revenue_org_type = revenue_org_type[order(-revenue_org_type$revenue), ]
top.revenue = c(1:10)
barplot(revenue_org_type[top.revenue,]$revenue, names.arg=revenue_org_type[top.revenue,]
$orgtype, xlab="Org Type", ylab="Revenue (in thousand USD)", col="red")
#------------------------------------------------------------------------
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------
#Budget Segmentation Analysis
#-----------------------------------------------------------------------
filter = !(is.na(df$budget)) & (df$budget >= 0)
df = df_orig[filter, ]
df$budget = df$budget/1000
orgs = unique(df$org)
budget = df[match(orgs, df$org), colnames(df)]$budget
hist(budget)
#-----------------------------------------------------------------------