Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DO NOT MERGE: POC Draft PR for Ambry integration #249

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
import com.linkedin.openhouse.cluster.storage.BaseStorageClient;
import com.linkedin.openhouse.cluster.storage.StorageType;
import com.linkedin.openhouse.cluster.storage.configs.StorageProperties;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import javax.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import org.apache.iceberg.aws.AwsClientFactories;
import org.apache.iceberg.aws.AwsClientFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Component;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider;
import software.amazon.awssdk.http.urlconnection.UrlConnectionHttpClient;
import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Configuration;

/**
* S3StorageClient is an implementation of the StorageClient interface for S3. It uses the {@link
Expand All @@ -39,11 +43,30 @@ public class S3StorageClient extends BaseStorageClient<S3Client> {
@PostConstruct
public synchronized void init() {
log.info("Initializing storage client for type: " + S3_TYPE);

validateProperties();
Map properties =
new HashMap(storageProperties.getTypes().get(S3_TYPE.getValue()).getParameters());
AwsClientFactory clientFactory = AwsClientFactories.from(properties);
this.s3 = clientFactory.s3();

this.s3 =
S3Client.builder()
.region(Region.of(properties.get("s3.region").toString()))
.endpointOverride(URI.create(properties.get("s3.endpoint").toString()))
.httpClient(UrlConnectionHttpClient.builder().build())
.credentialsProvider(
StaticCredentialsProvider.create(
AwsBasicCredentials.create(
properties.get("s3.access-key-id").toString(),
properties.get("s3.secret-access-key").toString())))
.serviceConfiguration(
S3Configuration.builder()
.pathStyleAccessEnabled(true)
.checksumValidationEnabled(false)
.build())
.build();

// AwsClientFactory clientFactory = AwsClientFactories.from(properties);
// this.s3 = clientFactory.s3();
}

@Override
Expand Down
29 changes: 29 additions & 0 deletions infra/recipes/docker-compose/oh-s3-spark/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
for starting spark-shell for s3:
```shell
cd infra/recipes/docker-compose/oh-s3-spark
id-tool grestin sign
docker compose up -d
```
login to the container:
```shell
docker exec -it local.spark-master /bin/bash
```
and then:
```shell
bin/spark-shell --packages org.apache.iceberg:iceberg-spark-runtime-3.1_2.12:1.2.0,software.amazon.awssdk:bundle:2.20.18,software.amazon.awssdk:url-connection-client:2.20.18 \
--jars openhouse-spark-runtime_2.12-*-all.jar \
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,com.linkedin.openhouse.spark.extensions.OpenhouseSparkSessionExtensions \
--conf spark.sql.catalog.openhouse=org.apache.iceberg.spark.SparkCatalog \
--conf spark.sql.catalog.openhouse.catalog-impl=com.linkedin.openhouse.spark.OpenHouseCatalog \
--conf spark.sql.catalog.openhouse.io-impl=org.apache.iceberg.aws.s3.S3FileIO \
--conf spark.sql.catalog.openhouse.s3.endpoint=https://lor1-0002329.int.linkedin.com:3183/s3/openhouse-s3-ambry-integration-prototype \
--conf spark.sql.catalog.openhouse.s3.access-key-id=admin \
--conf spark.sql.catalog.openhouse.s3.secret-access-key=password \
--conf spark.sql.catalog.openhouse.s3.path-style-access=true \
--conf spark.sql.catalog.openhouse.s3.checksum-enabled=false \
--conf spark.sql.catalog.openhouse.metrics-reporter-impl=com.linkedin.openhouse.javaclient.OpenHouseMetricsReporter \
--conf spark.sql.catalog.openhouse.uri=http://openhouse-tables:8080 \
--conf spark.sql.catalog.openhouse.auth-token=$(cat /var/config/openhouse.token) \
--conf spark.sql.catalog.openhouse.cluster=LocalS3Cluster \
--conf spark.sql.catalogImplementation=in-memory
```
9 changes: 7 additions & 2 deletions infra/recipes/docker-compose/oh-s3-spark/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@ cluster:
default-type: "s3"
types:
s3:
rootpath: "openhouse-bucket"
rootpath: "container-a"
endpoint: "s3://"
parameters:
s3.endpoint: "http://minioS3:9000"
s3.region: "us-east-1"
s3.endpoint: "https://lor1-0002329.int.linkedin.com:3183/s3/openhouse-s3-ambry-integration-prototype"
s3.access-key-id: "admin"
s3.secret-access-key: "password"
s3.path-style-access: true
s3.disable-put-object-md5-validation: true
s3.disable-get-object-md5-validation: true
s3.checksum-algorithm: "null"
s3.checksum-enabled: false
iceberg:
write:
format:
Expand Down
6 changes: 6 additions & 0 deletions infra/recipes/docker-compose/oh-s3-spark/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ services:
service: openhouse-tables
volumes:
- ./:/var/config/
- /etc/riddler:/etc/riddler
depends_on:
- openhouse-housetables
- minioS3
Expand Down Expand Up @@ -79,6 +80,9 @@ services:
extends:
file: ../common/spark-services.yml
service: spark-master
volumes:
- ./:/var/config/
- /etc/riddler:/etc/riddler
ports:
- "5005:5005"
environment:
Expand All @@ -89,6 +93,8 @@ services:
extends:
file: ../common/spark-services.yml
service: spark-worker-a
volumes:
- ./:/var/config/
depends_on:
- spark-master

Expand Down
48 changes: 48 additions & 0 deletions infra/recipes/docker-compose/oh-s3-spark/identity.cert
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-----BEGIN CERTIFICATE-----
MIIEMTCCAxmgAwIBAgIRAPr7bcCcyhHviCkMxHrrlyQwDQYJKoZIhvcNAQENBQAw
eTELMAkGA1UEBhMCVVMxEzARBgNVBAgTCkNhbGlmb3JuaWExFjAUBgNVBAcTDU1v
dW50YWluIFZpZXcxETAPBgNVBAoTCExpbmtlZEluMQ4wDAYDVQQLEwVUb29sczEa
MBgGA1UEAxMRTEkgRUkgSW50IENBIC0gRzEwHhcNMjQxMTA3MDU0MTQ3WhcNMjQx
MTA5MDU0MTQ3WjB/MQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEW
MBQGA1UEBwwNTW91bnRhaW4gVmlldzERMA8GA1UECgwITGlua2VkSW4xDjAMBgNV
BAsMBVRvb2xzMSAwHgYDVQQDDBdsYWphaW4tbW4yLmxpbmtlZGluLmJpejCCASIw
DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAJeiqLcwrnR0rTrKZy/oQQdQtXuF
Uj8HQ0BEDpFcBy/fHNbpjZW4+yNSnQQZfmiPU4NMxNJtAheDWIeFaaXYEI+aSxG7
xOM3yjj52R+B/9eGUJkrC9WhGVJfsFIzP0Tl1Hzxv9GisVVktYCdXVdecENvbS2Z
o6B7qssB3iW90Jj2rxqtk0B6EHLCxTz7O5kTzf3pcSwfPdvcxNperSrh9g5CXZtA
ZDHnH5C2SAcKBs3vShb3J2Ax5DPZ2o+n/Nzmvch6yVj6MG4XaR5NTFJfuB7sWYOk
dmzwvWn5V7cFwXz9Jq0RnqsnFoXMQTLrnkXiJ3LHMuKZHDHRKF4vpZQVS88CAwEA
AaOBrTCBqjAdBgNVHQ4EFgQUQEBdoQmNrpo65XU77eVO5ITt5HQwHwYDVR0jBBgw
FoAUYbxY7bYvXvJ2cQYd4/5X+ZN73bQwaAYDVR0RAQH/BF4wXIIXbGFqYWluLW1u
Mi5saW5rZWRpbi5iaXqGHHVybjpsaTp1c2VyUHJpbmNpcGFsKGxhamFpbimGI3Vy
bjpsaTp1c2VyUHJpbmNpcGFsTWV0YWRhdGEoMS4wLjApMA0GCSqGSIb3DQEBDQUA
A4IBAQBk+KHwayErkvpAgORpB9UAEBhR8XGBSp0oFrv+M7JYnM2JqbVEEx1Baune
qNoIgPNipWhkppNO/IjvBPUzK/6lWRBnF5XroKJN/FsFSuLVEold4DYdTLaSJnvS
oL5zwk7wWeI20BK08f5AmTrFAFlVGx/Wq14WI3m/dMcqKxRwQNg8udqfQ6h7SoeQ
oorV6n5N+3MX0ZifhBGD2FSMz/HdH3cx4G69qHA7yNG+y5aeZa9IZvXAz2KvLJdQ
dsorU1w/Q7cGilV0FU+XnEFri34Jl9utkkA5yPUaWrSa4tTq0Nd1/ixSZwP/V9HH
mGvA8YimjCzrdYjHyV51a2Zx+XJi
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIID4DCCAsigAwIBAgIQfU20MT9zYUKQhSYABqbQFDANBgkqhkiG9w0BAQsFADB6
MQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEWMBQGA1UEBxMNTW91
bnRhaW4gVmlldzERMA8GA1UEChMITGlua2VkSW4xDjAMBgNVBAsTBVRvb2xzMRsw
GQYDVQQDExJMSSBFSSBSb290IENBIC0gRzEwHhcNMjEwNjMwMDAyMzM2WhcNMjkw
NjMwMDAyMzM2WjB5MQswCQYDVQQGEwJVUzETMBEGA1UECBMKQ2FsaWZvcm5pYTEW
MBQGA1UEBxMNTW91bnRhaW4gVmlldzERMA8GA1UEChMITGlua2VkSW4xDjAMBgNV
BAsTBVRvb2xzMRowGAYDVQQDExFMSSBFSSBJbnQgQ0EgLSBHMTCCASIwDQYJKoZI
hvcNAQEBBQADggEPADCCAQoCggEBALX7idzYw+hNj0y5L+TP29RcpdMXbhHKRnMn
tOijriJDvi8cgqDa6ueMEvzyMT+keYRtfAOqeSyDOR4vk3CWijxzlPSBItYghu2y
QT0oaUW7ME51sexQKgSRwUq5AuAKwK35o0kZGZNRaKtpE22QAFfcpd/fzqfQcYAj
RsLoFCHqk9VmT599PZwByWOE3waSJENMnA8tF8yEBscNHERXSocALo0oCf2PSYqU
vt4SxBIwnTU713RR0ZU0pkBjM/xbI441O45CPcq+Fm34C1XPWYDoxMLqoUMHTP8B
e5oQx+vwMpPBfd6nGl/nkljIVJehFIqPDsiFUGTRrnsQl2N0NfcCAwEAAaNjMGEw
DgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFGG8WO22
L17ydnEGHeP+V/mTe920MB8GA1UdIwQYMBaAFNOxFjqsux8LqhZKIEn3IqmGZByv
MA0GCSqGSIb3DQEBCwUAA4IBAQAUXYmBgH226TI+VqzMt+XUd3e1vf+E/q7pdRw0
YsTi0hsJg1RzPYBtNixZ9CC5Ch3vX/U1mc7hgPuggb77KDDo6m+QnaRvAMIzO3kb
ljT5g/O5BZQ5u1+Myo3UU0p+2qQyOPTMrclzEeoJuJWPmlr9xnCoOYKp/iz5H/Dz
y4eRsvFyI0sAGjbmRHBxjlDbm0qBSfGhKAJzLW9U25ZCFudx0w7nwPL4JrL6a2eE
N7UCwXNvkRWCxbd4aZ3237Qvu6lXp30dUTZePIAPqhKPhmKSr8OWZDn/ykT6LkC9
KCWqyGKRcOQ2kdQVV78qOHPjfyOfUPv6NgHVLAi9jzXiISwH
-----END CERTIFICATE-----
28 changes: 28 additions & 0 deletions infra/recipes/docker-compose/oh-s3-spark/identity.key
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-----BEGIN PRIVATE KEY-----
MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCXoqi3MK50dK06
ymcv6EEHULV7hVI/B0NARA6RXAcv3xzW6Y2VuPsjUp0EGX5oj1ODTMTSbQIXg1iH
hWml2BCPmksRu8TjN8o4+dkfgf/XhlCZKwvVoRlSX7BSMz9E5dR88b/RorFVZLWA
nV1XXnBDb20tmaOge6rLAd4lvdCY9q8arZNAehBywsU8+zuZE8396XEsHz3b3MTa
Xq0q4fYOQl2bQGQx5x+QtkgHCgbN70oW9ydgMeQz2dqPp/zc5r3IeslY+jBuF2ke
TUxSX7ge7FmDpHZs8L1p+Ve3BcF8/SatEZ6rJxaFzEEy655F4idyxzLimRwx0She
L6WUFUvPAgMBAAECggEAFHvHmL99wYOFZxyyZblVF3zilyYCXztG7WaEJUSuP9w+
1BVGqAQOAM6HIWekONGrsV/scpSBJNX8Xoo6JEoUqqFoJKQdy6fcrGer2eLAwceG
GbrD5X7WodcFRYVgtZjqZFpv1AzLMcLh9dbMJiI+vg1OTUuk2+f4wWWmMaj7hb4P
3FA62dBfeMNTY8rZ1JW+RAl5YwxUXPjYc1TyhWSuOuj1KTHxY4rMJ9zJ9mdu50a4
1Rz34sG/0sICNVPVt87fX0pK/O6eGKLG3H5bXzWvcFzq1EpooxTkc7ImnTrUx03Z
iMvCvQwNZbuXI3OSZC/IaL5cjooAQ1arvH0ymjv34QKBgQDFpg3EooPrNFYfSxqe
lNFlw2XwoB2jqWMwsvZnF50gu3zgGf5py2Sa0+6OksH2e9Wb0F0DvfV0X82wgBPa
bAm7AK/Zc6vI7SiXVGjV1JZ13gshcWjg+VsFCoNPSdEx7kBsXYxcs5FRomZyJiKw
K8LXCDNYnWw+HSWh2hOyGcrQ0QKBgQDEZwB4lqOb2z7tKdMIi6GMLa6iOEWEES+K
JMOvD+ig8CGHvK275bWQuAKw1NodBgf7Bo6oxj98OAvnr+u42OODDOnXT1r7dpz/
5y3FTVHt9a+XopQKz3eLa1QTYAGjXJPQs9L/1w9LGN14U9ATus7DhMhiuzKDEeVb
VrY4JHp6nwKBgDkOebUFET4J6r636KRU46oT4U64ejXvxTGeyCD0oSh810y+VEJo
M8+6VIiV9yZXk0+as+9LnBsPOQq1voV0YM66IQmOKc4kIG6nMGhiKVk3U1FBMvyX
H15yLXvrHFfkpnZh/w4XzGCbmCVluufOij2XIXN4ZnH76gwyLzNf3LuxAoGAfnZw
WDVvHdE7oIxYxVzbZZ6Ynv2qzVIfkU0A731H5jEY9gTovV843PmPSfuUgjG6+8kK
HOSiRkL0b3fYSYrmXCD5hVkklVyX1TMvRqb1xUp4iSP3PUq/h/dRCLS9+UmIKE9R
4sjAoTNej7cv+TRFJKIgQWqWKM1iKMEZzea3MrMCgYB9rWPWEZzpQKaX4JGsys0F
2L3k78bQzXWamCU841vFaUkUU3j1szuYYGwRe2ZDtPQ+aaFu94W2rB/8JU2shyWj
s+0uJgrUkahLcDhORlbmBQJhqjqKK6OahS90X9kZwBqJbLWAl5XZuR3neHDKaq7N
Hr+bbRyhIhW3pXc2v+5iCA==
-----END PRIVATE KEY-----
Binary file not shown.
16 changes: 16 additions & 0 deletions infra/recipes/docker-compose/oh-s3-spark/log4j-config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">

<appender name="console" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern"
value="%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n" />
</layout>
</appender>
<root>
<priority value="info"/>
<appender-ref ref="console"/>
</root>

</log4j:configuration>
1 change: 1 addition & 0 deletions infra/recipes/docker-compose/oh-s3-spark/openhouse.token
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6Im9wZW5ob3VzZTM4NmIzNmE2LWJkMjQtNDVmNS1iMGE0LTIxNDBjYTBiNDg0MyIsInN1YiI6IntcIkNPREVcIjpcIkRVTU1ZX0NPREVcIixcIlVTRVItSURcIjpcIm9wZW5ob3VzZVwifSJ9.52dp8mAm_ZzOvAU9gbe5HBdTJkFefwRT_H1ZBR5t3cI
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eyJhbGciOiJIUzI1NiJ9.eyJpYXQiOjE2NTk2ODI4MDAsImp0aSI6InVfdGFibGVvd25lcmQ4NWMyOTkwLTA2ZDAtNDRiOS1hMTRlLTdiYzcxMmY0ODJiYSIsInN1YiI6IntcIkNPREVcIjpcIkRVTU1ZX0NPREVcIixcIlVTRVItSURcIjpcInVfdGFibGVvd25lclwifSJ9.c5G28SJoWDVgaOL4d-l2GPNBPMouRhxhoxxb4z4IbDo
2 changes: 1 addition & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ date
# Using -XX:NativeMemoryTracking=summary for quick idea on java process memory breakdown, one could switch to
# "detail" for further details

java -Xmx256M -Xms64M -XX:NativeMemoryTracking=summary -jar "$@"
java -Xmx256M -Xms64M -XX:NativeMemoryTracking=summary -Djavax.net.ssl.keyStore=/var/config/identity.p12 -Djavax.net.ssl.keyStorePassword=work_around_jdk-6879539 -Djavax.net.ssl.trustStore=/etc/riddler/cacerts -Djavax.net.ssl.trustStorePassword=changeit -Djavax.net.ssl.keyStoreType=PKCS12 -Dcom.amazonaws.services.s3.disablePutObjectMD5Validation=true -Dcom.amazonaws.services.s3.disableGetObjectMD5Validation=true -Dcom.amazonaws.sdk.disableCertChecking=true -Dlog4j.configuration=file:/var/config/log4j-config.xml -Dlog4j1.compatibility=true -Djavax.net.debug=ssl -jar "$@"
6 changes: 5 additions & 1 deletion scripts/java/tools/dummytokens/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,8 @@ task generateTokens(type: Exec) {
commandLine "java", "-jar", jar.archiveFile.get(), "-d", project.getRootDir().getAbsolutePath() + "/infra/recipes/docker-compose/oh-hadoop-spark/"
}

tasks.named("build") { finalizedBy("generateTokens") }
tasks.named("build") { finalizedBy("generateTokens") }

tasks.named('jar') {
dependsOn ':cluster:configs:jar'
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public void setupTest() {
s3StorageClient = context.getBean(S3StorageClient.class);
}

/*
@Test
public void testS3StorageClientInvalidProperties() {
when(storageProperties.getTypes())
Expand All @@ -54,7 +55,7 @@ public void testS3StorageClientNullOrEmptyProperties() {
}
});
assertThrows(IllegalArgumentException.class, () -> s3StorageClient.init());
}
}*/

@Test
public void testS3StorageClientValidProperties() {
Expand All @@ -73,6 +74,10 @@ private StorageProperties.StorageTypeProperties getStorageTypeProperties() {
storageTypeProperties.setEndpoint("http://S3:9000");
storageTypeProperties.setRootPath("/mybucket");
Map<String, String> parameters = new HashMap<>();
parameters.put("s3.region", "us-east-1");
parameters.put("s3.endpoint", "http://S3:9000");
parameters.put("s3.access-key-id", "admin");
parameters.put("s3.secret-access-key", "password");
System.setProperty("aws.region", "us-east-1");
storageTypeProperties.setParameters(parameters);
return storageTypeProperties;
Expand Down
2 changes: 1 addition & 1 deletion tables-service.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ RUN mkdir -p /usr/java && ln -sfn /export/apps/jdk/JDK-1_8_0_172 /usr/java/defau
USER $USER

EXPOSE 8080
ENTRYPOINT ["sh", "-c", "./run.sh $APP_NAME.jar $@"]
ENTRYPOINT ["sh", "-c", "./run.sh $APP_NAME.jar --debug $@"]
Loading