Skip to content

Commit 215fb96

Browse files
authored
Merge branch 'lakesoul-io:main' into main
2 parents 8a2cb56 + f6b4127 commit 215fb96

File tree

227 files changed

+16476
-2507
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

227 files changed

+16476
-2507
lines changed

.github/workflows/consistency-ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ jobs:
7979
with:
8080
spark-version: '3.3.1'
8181
hadoop-version: '3'
82+
spark-url: 'https://mirrors.huaweicloud.com/apache/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz'
8283
- run: spark-submit --version
8384

8485
- uses: actions-rs/toolchain@v1

.github/workflows/deployment.yml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,9 @@ jobs:
8888
steps:
8989
- name: Install automake
9090
run: brew install automake
91-
- uses: actions/checkout@v4
91+
- uses: actions/checkout@v5
9292
- name: Set up JDK 11
93-
uses: actions/setup-java@v4
93+
uses: actions/setup-java@v5
9494
with:
9595
java-version: '11'
9696
distribution: 'temurin'
@@ -152,23 +152,24 @@ jobs:
152152
name: lakesoul-nativemetadata-x86_64-pc-windows-msvc
153153
path: ./rust/target/release/
154154
- name: Set up JDK 11
155-
uses: actions/setup-java@v4
155+
uses: actions/setup-java@v5
156156
with:
157157
java-version: '11'
158158
distribution: 'temurin'
159159
cache: maven
160+
server-id: central
161+
server-username: MAVEN_USERNAME
162+
server-password: MAVEN_PASSWORD
163+
gpg-private-key: ${{ secrets.MAVEN_SIGN_GPG_KEY }}
164+
gpg-passphrase: GPG_PASSPHRASE
160165
- name: Install Protoc
161166
uses: arduino/setup-protoc@v2
162167
with:
163168
version: "23.x"
164169
repo-token: ${{ secrets.GITHUB_TOKEN }}
165170
- name: Release to Maven Central Repository
166-
uses: samuelmeuli/action-maven-publish@v1
167-
with:
168-
gpg_private_key: ${{ secrets.MAVEN_SIGN_GPG_KEY }}
169-
gpg_passphrase: ${{ secrets.MAVEN_SIGN_GPG_PASSPHRASE }}
170-
nexus_username: ${{ secrets.OSSRH_USERNAME }}
171-
nexus_password: ${{ secrets.OSSRH_TOKEN }}
172-
maven_goals_phases: "deploy"
173-
maven_args: "-DskipTests -Dmaven.test.skip=true"
174-
maven_profiles: "cross-build,release-sign-artifacts"
171+
run: mvn --batch-mode deploy -Pcross-build,release-sign-artifacts -DskipTests -Dmaven.test.skip=true
172+
env:
173+
MAVEN_USERNAME: ${{ secrets.OSSRH_USERNAME }}
174+
MAVEN_PASSWORD: ${{ secrets.OSSRH_TOKEN }}
175+
GPG_PASSPHRASE: ${{ secrets.MAVEN_SIGN_GPG_PASSPHRASE }}

.github/workflows/flink-cdc-hdfs-test.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ jobs:
5858
run: |
5959
python -m pip install --upgrade pip setuptools wheel
6060
pip install pymysql cryptography jproperties --no-cache-dir
61-
wget https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.20.1/flink-s3-fs-hadoop-1.20.1.jar -O $HOME/flink-s3-fs-hadoop-1.20.1.jar
62-
wget https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.13.1/parquet-hadoop-bundle-1.13.1.jar -O $HOME/parquet-hadoop-bundle-1.13.1.jar
63-
wget https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.20.1/flink-parquet-1.20.1.jar -O $HOME/flink-parquet-1.20.1.jar
61+
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.20.1/flink-s3-fs-hadoop-1.20.1.jar -O $HOME/flink-s3-fs-hadoop-1.20.1.jar
62+
wget -q https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.13.1/parquet-hadoop-bundle-1.13.1.jar -O $HOME/parquet-hadoop-bundle-1.13.1.jar
63+
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.20.1/flink-parquet-1.20.1.jar -O $HOME/flink-parquet-1.20.1.jar
6464
- name: Install Protoc
6565
uses: arduino/setup-protoc@v2
6666
with:

.github/workflows/flink-cdc-test.yml

Lines changed: 23 additions & 16 deletions
Large diffs are not rendered by default.

.github/workflows/maven-test.yml

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,17 @@ on:
1212
paths-ignore:
1313
- "javadoc/**"
1414
- "website/**"
15-
- "cpp/**"
1615
- "python/**"
16+
- "cpp/**"
1717
- "**.md"
1818
branches:
1919
- 'main'
2020
pull_request:
2121
paths-ignore:
2222
- "javadoc/**"
2323
- "website/**"
24-
- "cpp/**"
2524
- "python/**"
25+
- "cpp/**"
2626
- "**.md"
2727
branches:
2828
- 'main'
@@ -202,6 +202,79 @@ jobs:
202202
retention-days: 5
203203
if-no-files-found: error
204204

205+
spark-test-gluten:
206+
runs-on: ubuntu-latest
207+
needs: [ build-rust-linux-x86_64 ]
208+
209+
services:
210+
# Label used to access the service container
211+
postgres:
212+
# Docker Hub image
213+
image: postgres:14.5
214+
# Provide the password for postgres
215+
env:
216+
POSTGRES_PASSWORD: lakesoul_test
217+
POSTGRES_USER: lakesoul_test
218+
POSTGRES_DB: lakesoul_test
219+
# Set health checks to wait until postgres has started
220+
options: >-
221+
--health-cmd pg_isready
222+
--health-interval 10s
223+
--health-timeout 5s
224+
--health-retries 5
225+
--name lakesoul-test-pg
226+
ports:
227+
# Maps tcp port 5432 on service container to the host
228+
- 5432:5432
229+
230+
steps:
231+
- uses: actions/checkout@v4
232+
- name: Set up JDK 11
233+
uses: actions/setup-java@v4
234+
with:
235+
java-version: '11'
236+
distribution: 'temurin'
237+
cache: maven
238+
- name: Install psql
239+
run: sudo apt-get install -y postgresql-client-16
240+
- name: Init PG
241+
run: |
242+
./script/meta_init_for_local_test.sh -j 1
243+
- name: Install Protoc
244+
uses: arduino/setup-protoc@v2
245+
with:
246+
version: "23.x"
247+
repo-token: ${{ secrets.GITHUB_TOKEN }}
248+
- uses: actions/download-artifact@v4
249+
with:
250+
name: lakesoul-nativemetadata-x86_64-unknown-linux-gnu-maven-test
251+
path: ./rust/target/release/
252+
- uses: actions/download-artifact@v4
253+
with:
254+
name: lakesoul-nativeio-x86_64-unknown-linux-gnu-maven-test
255+
path: ./rust/target/release/
256+
- name: Build with Maven
257+
run: |
258+
sudo apt-get update && sudo apt-get install --only-upgrade tzdata
259+
wget -q https://dmetasoul-bucket.obs.cn-southwest-2.myhuaweicloud.com/releases/lakesoul/gluten-1.6.0-84ac41874-snapshot.tar.gz
260+
mkdir -p ~/.m2/repository
261+
tar xf gluten-1.6.0-84ac41874-snapshot.tar.gz -C ~/.m2/repository
262+
rm -f gluten-1.6.0-84ac41874-snapshot.tar.gz
263+
MAVEN_OPTS="-Xmx4g -Dio.netty.tryReflectionSetAccessible=true" mvn -B test -pl lakesoul-spark-gluten -am -Pgluten -Pcross-build --file pom.xml -Dtest='LakeSoulGlutenCompatSuite,UpdateGlutenTestSuite,UpsertGlutenTestSuite,DeleteSQLGlutenTestSuite,MergeIntoSQLGlutenTestSuite,' -Dsurefire.failIfNoSpecifiedTests=false
264+
- name: Generate Report Site
265+
if: always()
266+
run: |
267+
mvn surefire-report:report-only -pl lakesoul-spark-gluten -am -Pgluten
268+
- name: Upload Test Report
269+
if: always()
270+
continue-on-error: true
271+
uses: actions/upload-artifact@v4
272+
with:
273+
name: maven-test-report-artifact-spark-2
274+
path: lakesoul-spark-gluten/target/site
275+
retention-days: 5
276+
if-no-files-found: error
277+
205278
spark-test-rbac:
206279
runs-on: ubuntu-latest
207280
needs: [ build-rust-linux-x86_64 ]

.github/workflows/presto-cdc-test.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@ jobs:
5858
run: |
5959
python -m pip install --upgrade pip setuptools wheel
6060
pip install pymysql cryptography jproperties --no-cache-dir
61-
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz -O $HOME/hadoop-3.3.5.tar.gz && tar xf $HOME/hadoop-3.3.5.tar.gz -C $HOME
61+
wget -q https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz -O $HOME/hadoop-3.3.5.tar.gz && tar xf $HOME/hadoop-3.3.5.tar.gz -C $HOME
6262
echo "HADOOP_HOME=$HOME/hadoop-3.3.5" >> $GITHUB_ENV
63-
wget https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.20.1/flink-s3-fs-hadoop-1.20.1.jar -O $HOME/flink-s3-fs-hadoop-1.20.1.jar
64-
wget https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.12.3/parquet-hadoop-bundle-1.12.3.jar -O $HOME/parquet-hadoop-bundle-1.12.3.jar
65-
wget https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.20.1/flink-parquet-1.20.1.jar -O $HOME/flink-parquet-1.20.1.jar
63+
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-s3-fs-hadoop/1.20.1/flink-s3-fs-hadoop-1.20.1.jar -O $HOME/flink-s3-fs-hadoop-1.20.1.jar
64+
wget -q https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop-bundle/1.12.3/parquet-hadoop-bundle-1.12.3.jar -O $HOME/parquet-hadoop-bundle-1.12.3.jar
65+
wget -q https://repo1.maven.org/maven2/org/apache/flink/flink-parquet/1.20.1/flink-parquet-1.20.1.jar -O $HOME/flink-parquet-1.20.1.jar
6666
- name: Install Protoc
6767
uses: arduino/setup-protoc@v2
6868
with:

.github/workflows/python-ci.yml

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
name: CI with Python Test
2+
on:
3+
push:
4+
paths-ignore:
5+
- "javadoc/**"
6+
- "website/**"
7+
- "**.md"
8+
branches:
9+
- 'main'
10+
- 'release/**'
11+
tags:
12+
- 'py-v[0-9]+.[0-9]+.[0-9]+'
13+
pull_request:
14+
paths-ignore:
15+
- "javadoc/**"
16+
- "website/**"
17+
- "**.md"
18+
branches:
19+
- 'main'
20+
- 'release/**'
21+
workflow_dispatch:
22+
23+
jobs:
24+
run-pytest:
25+
runs-on: ubuntu-latest
26+
services:
27+
# Label used to access the service container
28+
postgres:
29+
# Docker Hub image
30+
image: postgres:14.5
31+
# Provide the password for postgres
32+
env:
33+
POSTGRES_PASSWORD: lakesoul_test
34+
POSTGRES_USER: lakesoul_test
35+
POSTGRES_DB: lakesoul_test
36+
# Set health checks to wait until postgres has started
37+
options: >-
38+
--health-cmd pg_isready
39+
--health-interval 10s
40+
--health-timeout 5s
41+
--health-retries 5
42+
--name lakesoul-test-pg
43+
ports:
44+
# Maps tcp port 5432 on service container to the host
45+
- 5432:5432
46+
steps:
47+
- uses: actions/checkout@v4
48+
- name: Set up JDK 11
49+
uses: actions/setup-java@v4
50+
with:
51+
java-version: '11'
52+
distribution: 'temurin'
53+
cache: maven
54+
- name: Install psql
55+
run: sudo apt-get install -y postgresql-client-16
56+
- name: Init PG
57+
run: |
58+
./script/meta_init_for_local_test.sh -j 2
59+
- name: Install Protoc
60+
uses: arduino/setup-protoc@v2
61+
with:
62+
version: "23.x"
63+
repo-token: ${{ secrets.GITHUB_TOKEN }}
64+
- name: Install uv
65+
uses: astral-sh/setup-uv@v6
66+
with:
67+
enable-cache: true
68+
cache-suffix: "lakesoul-py-ci"
69+
cache-dependency-glob: |
70+
pyproject.toml
71+
uv.lock
72+
- uses: Swatinem/rust-cache@v2
73+
with:
74+
workspaces: "./rust -> target"
75+
- name: pytest
76+
env:
77+
LAKESOUL_SOURCE_DIR: ${{ github.workspace }}
78+
run: |
79+
export LD_LIBRARY_PATH=$JAVA_HOME/lib/server:$LD_LIBRARY_PATH
80+
echo $LD_LIBRARY_PATH
81+
mvn package -pl lakesoul-spark -am -DskipTests
82+
uv sync --no-install-project
83+
source .venv/bin/activate
84+
bash cpp/compile.sh
85+
cp cpp/build/python/lakesoul/metadata/generated/entity_pb2.py python/lakesoul/metadata/generated
86+
cp cpp/build/_lakesoul_dataset.so python/lakesoul/arrow
87+
cp rust/target/release/liblakesoul_metadata_c.so python/lakesoul/metadata/lib/
88+
cargo run --manifest-path rust/Cargo.toml --release -p lakesoul-console -- tpch-gen -p "file:///tmp/lakesoul/tpch_data" --scale-factor 0.1 -n 8
89+
uv run pytest python/tests/ -s
90+
publish:
91+
runs-on: ubuntu-latest
92+
if: startsWith(github.event.ref, 'refs/tags/py-v')
93+
needs: ["run-pytest"]
94+
env:
95+
PIP_INDEX_URL: https://pypi.tuna.tsinghua.edu.cn/simple
96+
permissions:
97+
id-token: write
98+
steps:
99+
- name: Checkout repository
100+
uses: actions/checkout@v3
101+
- name: build
102+
run: |
103+
python3 python/docker_build_all_wheels.py
104+
- name: Publish package distributions to PyPI
105+
uses: pypa/gh-action-pypi-publish@release/v1
106+
107+
108+
109+

.github/workflows/rust-ci.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,7 @@ jobs:
7878
rm -f Cargo.lock
7979
cargo clean
8080
RUST_BACKTRACE=full cargo test --package lakesoul-datafusion
81-
RUST_BACKTRACE=full cargo test --package lakesoul-flight --test flight_sql -- --color always
81+
RUST_BACKTRACE=full cargo test --package lakesoul-flight --test flight_sql -- --color always --nocapture
82+
RUST_BACKTRACE=full cargo test --color=always --package lakesoul-metadata --lib rbac::tests::test_verify_permission -- --nocapture
83+
RUST_BACKTRACE=full cargo test --color=always --package lakesoul-s3-proxy --bin lakesoul-s3-proxy tests --profile test --no-fail-fast -- --nocapture
8284

.github/workflows/website-publish.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ on:
77
push:
88
tags:
99
- 'v[0-9]+.[0-9]+.[0-9]+'
10+
- 'py-v[0-9]+.[0-9]+.[0-9]+'
1011
workflow_dispatch:
1112

1213
jobs:

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
.pytest_cache
56
/target/
67
/build/
78
/.idea/
89
/spark-warehouse/
910
/derby.log
1011
/.metals/
1112
/.vscode/
13+
/**/.venv
1214
*.iml
1315
/native-io/lakesoul-io-java/target/
1416
/aws-test/target/
@@ -32,7 +34,9 @@ __pycache__/
3234
/python/lakesoul/metadata/generated/entity_pb2_grpc.py
3335
/python/build/
3436
/python/lakesoul.egg-info/
37+
/python/lakesoul/metadata/lib/liblakesoul_metadata_c.so
3538
*.whl
3639
/wheelhouse/
3740
/rust/.idea
41+
**/.venv/
3842
.flattened-pom.xml

0 commit comments

Comments
 (0)