Skip to content

Commit fdc22b7

Browse files
authored
feat: add kernel computation and update to v0.2.0 (#36)
* wip: kernel computation feature (#27) * fix: improve data parser * feat: init kernel computing feature powered by duckdb * fixes in kernel computation (#33) * fix: timezone diff issue * fix: userKernel -> kernelComputation * fix: global var & stopApp (#34) * docs: add kernel mode (#35)
1 parent f93a1e6 commit fdc22b7

File tree

13 files changed

+323
-50
lines changed

13 files changed

+323
-50
lines changed

DESCRIPTION

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: GWalkR
22
Title: Interactive Exploratory Data Analysis Tool
3-
Version: 0.1.5
3+
Version: 0.2.0
44
Authors@R: c(
55
person("Yue", "Yu", , "[email protected]", role = c("aut", "cre"),
66
comment = c(ORCID = "0000-0002-9302-0793")),
@@ -17,4 +17,7 @@ Imports:
1717
htmlwidgets,
1818
jsonlite,
1919
openssl,
20-
shiny
20+
shiny,
21+
shinycssloaders,
22+
DBI,
23+
duckdb

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
export(gwalkr)
44
export(gwalkrOutput)
55
export(renderGwalkr)
6+
import(DBI)
7+
import(duckdb)
68
import(htmlwidgets)
79
import(openssl)
810
import(shiny)
11+
import(shinycssloaders)
12+
importFrom(jsonlite,toJSON)

R/duckdb_utils.R

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
library(DBI)
2+
library(duckdb)
3+
4+
my_env <- new.env()
5+
6+
duckdb_register_con <- function(df) {
7+
my_env$con <- dbConnect(duckdb::duckdb(), ":memory:")
8+
dbExecute(my_env$con, "INSTALL icu")
9+
dbExecute(my_env$con, "LOAD icu")
10+
dbExecute(my_env$con, "SET GLOBAL TimeZone = 'UTC'")
11+
DBI::dbWriteTable(my_env$con, "gwalkr_mid_table", as.data.frame(df), overwrite = FALSE)
12+
}
13+
14+
duckdb_unregister_con <- function(df) {
15+
if (!is.null(my_env$con)) {
16+
dbDisconnect(my_env$con)
17+
my_env$con <- NULL # Set to NULL after disconnecting
18+
}
19+
}
20+
21+
duckdb_get_field_meta <- function() {
22+
if (exists("con", envir = my_env)) {
23+
result <- dbGetQuery(my_env$con, 'SELECT * FROM gwalkr_mid_table LIMIT 1')
24+
if (nrow(result) > 0) {
25+
return(get_data_meta_type(result))
26+
}
27+
} else {
28+
stop("Database connection not found.")
29+
}
30+
}
31+
32+
duckdb_get_data <- function(sql) {
33+
if (exists("con", envir = my_env)) {
34+
result <- dbGetQuery(my_env$con, sql)
35+
if (nrow(result) > 0) {
36+
return(result)
37+
}
38+
} else {
39+
stop("Database connection not found.")
40+
}
41+
}
42+
43+
get_data_meta_type <- function(data) {
44+
meta_types <- list()
45+
46+
for (key in names(data)) {
47+
value <- data[[key]]
48+
field_meta_type <- if (inherits(value, "POSIXct")) {
49+
if (!is.null(attr(value, "tzone"))) "datetime_tz" else "datetime"
50+
} else if (is.numeric(value)) {
51+
"number"
52+
} else {
53+
"string"
54+
}
55+
meta_types <- append(meta_types, list(list(key = key, type = field_meta_type)))
56+
}
57+
58+
return(meta_types)
59+
}

R/gwalkr.R

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,19 @@
44
#'
55
#' @import htmlwidgets
66
#' @import openssl
7+
#' @importFrom jsonlite toJSON
8+
#' @import shiny
9+
#' @import shinycssloaders
10+
#' @import DBI
11+
#' @import duckdb
712
#'
813
#' @param data A data frame to be visualized in the GWalkR. The data frame should not be empty.
914
#' @param lang A character string specifying the language for the widget. Possible values are "en" (default), "ja", "zh".
1015
#' @param dark A character string specifying the dark mode preference. Possible values are "light" (default), "dark", "media".
11-
#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame.
12-
#' Each top level element in the list corresponds to a column, and the list assigned to each column should have
13-
#' two elements: `analyticalType` and `semanticType`. `analyticalType` can
14-
#' only be one of "measure" or "dimension". `semanticType` can only be one of
16+
#' @param columnSpecs An optional list of lists to manually specify the types of some columns in the data frame.
17+
#' Each top level element in the list corresponds to a column, and the list assigned to each column should have
18+
#' two elements: `analyticalType` and `semanticType`. `analyticalType` can
19+
#' only be one of "measure" or "dimension". `semanticType` can only be one of
1520
#' "quantitative", "temporal", "nominal" or "ordinal". For example:
1621
#' \code{list(
1722
#' "gender" = list(analyticalType = "dimension", semanticType = "nominal"),
@@ -20,6 +25,7 @@
2025
#' @param visConfig An optional config string to reproduce your chart. You can copy the string by clicking "export config" button on the GWalkR interface.
2126
#' @param visConfigFile An optional config file path to reproduce your chart. You can download the file by clicking "export config" button then "download" button on the GWalkR interface.
2227
#' @param toolbarExclude An optional list of strings to exclude the tools from toolbar UI. However, Kanaries brand info is not allowed to be removed or changed unless you are granted with special permission.
28+
#' @param kernelComputation An optional boolean to enable the kernel mode computation which is much more efficient. Default is FALSE.
2329
#'
2430
#' @return An \code{htmlwidget} object that can be rendered in R environments
2531
#'
@@ -28,42 +34,47 @@
2834
#' gwalkr(mtcars)
2935
#'
3036
#' @export
31-
gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list()) {
37+
gwalkr <- function(data, lang = "en", dark = "light", columnSpecs = list(), visConfig = NULL, visConfigFile = NULL, toolbarExclude = list(), kernelComputation = FALSE) {
3238
if (!is.data.frame(data)) stop("data must be a data frame")
3339
if (!is.null(visConfig) && !is.null(visConfigFile)) stop("visConfig and visConfigFile are mutually exclusive")
3440
lang <- match.arg(lang, choices = c("en", "ja", "zh"))
3541

3642
rawFields <- raw_fields(data, columnSpecs)
3743
colnames(data) <- sapply(colnames(data), fname_encode)
38-
44+
3945
if (!is.null(visConfigFile)) {
4046
visConfig <- readLines(visConfigFile, warn=FALSE)
4147
}
42-
# forward options using x
43-
x = list(
44-
dataSource = jsonlite::toJSON(data),
45-
rawFields = rawFields,
46-
i18nLang = lang,
47-
visSpec = visConfig,
48-
dark = dark,
49-
toolbarExclude = toolbarExclude
50-
)
5148

52-
# create widget
53-
htmlwidgets::createWidget(
54-
name = 'gwalkr',
55-
x,
56-
package = 'GWalkR',
57-
width='100%',
58-
height='100%'
59-
)
49+
if (kernelComputation) {
50+
gwalkr_kernel(data, lang, dark, rawFields, visConfig, toolbarExclude)
51+
} else {
52+
x = list(
53+
dataSource = toJSON(data),
54+
rawFields = rawFields,
55+
i18nLang = lang,
56+
visSpec = visConfig,
57+
dark = dark,
58+
toolbarExclude = toolbarExclude,
59+
useKernel = FALSE
60+
)
61+
62+
# create widget
63+
htmlwidgets::createWidget(
64+
name = 'gwalkr',
65+
x,
66+
package = 'GWalkR',
67+
width='100%',
68+
height='100%'
69+
)
70+
}
6071
}
6172

6273
#' Shiny bindings for gwalkr
6374
#'
6475
#' Output and render functions for using gwalkr within Shiny
6576
#' applications and interactive Rmd documents.
66-
#'
77+
#'
6778
#' @import shiny
6879
#'
6980
#' @param outputId output variable to read from

R/gwalkr_kernel.R

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
utils::globalVariables(c(".rs.invokeShinyPaneViewer"))
2+
3+
convert_timestamps_in_df <- function(df) {
4+
for (colname in colnames(df)) {
5+
if (inherits(df[[colname]], "POSIXt")) {
6+
df[[colname]] <- as.numeric(as.POSIXct(df[[colname]], tz = "UTC")) * 1000
7+
}
8+
}
9+
return(df)
10+
}
11+
12+
gwalkr_kernel <- function(data, lang, dark, rawFields, visConfig, toolbarExclude) {
13+
cat("GWalkR kernel mode initialized...\n")
14+
cat("Note: The console is unavailable while running a Shiny app. You can stop the app to use the console.\n")
15+
16+
filter_func <- function(data, req) {
17+
query <- parseQueryString(req$QUERY_STRING)
18+
19+
res <- duckdb_get_data(query$sql)
20+
res <- convert_timestamps_in_df(res)
21+
22+
json <- toJSON(
23+
res,
24+
auto_unbox = TRUE
25+
)
26+
27+
httpResponse(
28+
status = 200L,
29+
content_type = "application/json",
30+
content = json
31+
)
32+
}
33+
34+
app_options <- if (exists(".rs.invokeShinyPaneViewer")) {
35+
c(launch.browser = .rs.invokeShinyPaneViewer)
36+
} else {
37+
list()
38+
}
39+
40+
app <- shinyApp(
41+
ui = fluidPage(
42+
shinycssloaders::withSpinner(
43+
gwalkrOutput("gwalkr_kernel"),
44+
proxy.height="400px"
45+
)
46+
),
47+
48+
server = function(input, output, session) {
49+
path <- session$registerDataObj(
50+
"GWALKR",
51+
NULL,
52+
filter_func
53+
)
54+
55+
duckdb_register_con(data)
56+
fieldMetas <- duckdb_get_field_meta()
57+
58+
x = list(
59+
rawFields = rawFields,
60+
i18nLang = lang,
61+
visSpec = visConfig,
62+
dark = dark,
63+
toolbarExclude = toolbarExclude,
64+
useKernel = TRUE,
65+
fieldMetas = fieldMetas,
66+
endpointPath = path
67+
)
68+
69+
output$gwalkr_kernel = renderGwalkr({
70+
htmlwidgets::createWidget(
71+
name = 'gwalkr',
72+
x,
73+
package = 'GWalkR',
74+
width = '100%',
75+
height = '100%'
76+
)
77+
})
78+
session$onSessionEnded(function() {
79+
cat("GwalkR closed")
80+
duckdb_unregister_con()
81+
stopApp()
82+
})
83+
},
84+
85+
options = app_options
86+
)
87+
88+
if (interactive()) app
89+
}

README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,30 @@ It can simplify your R data analysis and data visualization workflow, by turning
2424
2525
## Getting Started
2626

27-
### Setup GWalkR
27+
### 📦 Setup GWalkR
2828

2929
```R
3030
install.packages("GWalkR")
3131
library(GWalkR)
3232
```
3333

34-
### Start Your Data Exploration in a Single Line of Code
34+
### 📈 Start Your Data Exploration in a Single Line of Code
3535

3636
```R
3737
data(iris)
3838
gwalkr(iris)
3939
```
4040

41+
### 🚀 Switch to Kernel Computation for Large Datasets
42+
43+
```R
44+
gwalkr(large_df, kernelComputation = TRUE)
45+
```
46+
47+
Here is a [tutorial](https://medium.com/@bruceyu0416/eda-reimagined-in-r-gwalkr-duckdb-for-lightning-fast-visualizations-05b011e8ae39) with more details.
48+
49+
Please note that the kernel mode will be running in a Shiny app which will block your R console. You can stop the app to use the console.
50+
4151
## Main Features
4252
### Get an overview of your data frame under 'Data' tab.
4353
<img width="700" alt="image" src="https://github.com/bruceyyu/GWalkR/assets/33870780/67131cfa-a25b-44ae-90a0-95902ea5edb1">

man/gwalkr.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

web_app/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,16 @@
1111
},
1212
"dependencies": {
1313
"@kanaries/graphic-walker": "^0.4.70",
14+
"@kanaries/gw-dsl-parser": "^0.1.49",
1415
"@rollup/plugin-commonjs": "^25.0.2",
1516
"@rollup/plugin-replace": "^5.0.2",
1617
"@rollup/plugin-terser": "^0.4.3",
1718
"@rollup/plugin-typescript": "^11.1.2",
1819
"mobx-react-lite": "^3.4.3",
1920
"react": "^18.2.0",
2021
"react-dom": "^18.2.0",
21-
"styled-components": "^5.3.6"
22+
"styled-components": "^5.3.6",
23+
"vite-plugin-wasm": "^3.3.0"
2224
},
2325
"devDependencies": {
2426
"@types/react": "^18.2.14",

web_app/src/dataSource/index.tsx

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import type { IDataQueryPayload, IRow } from "@kanaries/graphic-walker/interfaces";
2+
import { parser_dsl_with_meta } from "@kanaries/gw-dsl-parser";
3+
4+
const DEFAULT_LIMIT = 50_000;
5+
6+
const sendHTTPData = (sql: string, endpointPath: string) => {
7+
return new Promise((resolve, reject) => {
8+
fetch(`${endpointPath}&sql=${encodeURIComponent(sql)}`)
9+
.then((response) => response.json())
10+
.then((data) => {
11+
console.log("Processed data from R:", data);
12+
resolve(data);
13+
})
14+
.catch((error) => {
15+
console.error("Error:", error);
16+
reject(error);
17+
});
18+
});
19+
};
20+
21+
export function getDataFromKernelBySql(fieldMetas: { key: string; type: string }[], endpointPath: string) {
22+
return async (payload: IDataQueryPayload) => {
23+
const sql = parser_dsl_with_meta(
24+
"gwalkr_mid_table",
25+
JSON.stringify({ ...payload, limit: payload.limit ?? DEFAULT_LIMIT }),
26+
JSON.stringify({ gwalkr_mid_table: fieldMetas })
27+
);
28+
const result = (await sendHTTPData(sql, endpointPath)) ?? [];
29+
return result as IRow[];
30+
};
31+
}

0 commit comments

Comments
 (0)