Skip to content

Commit 06eb6e7

Browse files
committed
[KYUUBI-SHADED #28] Step 1/2: Overwrite SnapStream to remove deps of snappy in ZK client 3.6
### _Why are the changes needed?_ This PR aims to tackle #10 with another approach. Though we can add the snappy deps into ZK client 3.6 to address it, as the snappy has native libs that are not able to be relocated, thus there will be potential conflict issues, this approach also increases the size of `kyuubi-relocated-zookeeper-3.6`. Actually, `org.apache.zookeeper.server.persistence.SnapStream` is only used on the server side, in terms of Kyuubi's usage, the embedded ZK feature is mostly for testing purposes, overwriting the `SnapStream` to drop the Snappy support should be OK. To gain a clear change history, I purpose to split the change into 2 PRs, 1. copy `org.apache.zookeeper.server.persistence.SnapStream` from [Apache Zookeeper v3.6.4](https://raw.githubusercontent.com/apache/zookeeper/release-3.6.4/zookeeper-server/src/main/java/org/apache/zookeeper/server/persistence/SnapStream.java) as-is 2. remove the Snappy support (simply throw `UnsupportedOperationException`) in `org.apache.zookeeper.server.persistence.SnapStream` and snappy deps ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/develop_tools/testing.html#running-tests) locally before make a pull request Closes #28 from pan3793/snappy-1. 2aa4995 [Cheng Pan] style 164f5f5 [Cheng Pan] Step 1/2: Overwrite SnapStream to remove deps of snappy in ZK client 3.6 Authored-by: Cheng Pan <chengpan@apache.org> Signed-off-by: Cheng Pan <chengpan@apache.org>
1 parent 6b0166f commit 06eb6e7

File tree

2 files changed

+316
-0
lines changed
  • kyuubi-relocated-zookeeper-parent/kyuubi-relocated-zookeeper-36

2 files changed

+316
-0
lines changed

kyuubi-relocated-zookeeper-parent/kyuubi-relocated-zookeeper-36/pom.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ under the License.
3232
<description>Relocated Zookeeper 3.6 classes used by Kyuubi internally.</description>
3333

3434
<properties>
35+
<slf4j.version>1.7.36</slf4j.version>
3536
<zookeeper.version>3.6.4</zookeeper.version>
3637
<curator.version>5.4.0</curator.version>
3738
<netty.version>4.1.91.Final</netty.version>
@@ -56,6 +57,13 @@ under the License.
5657
</dependencyManagement>
5758

5859
<dependencies>
60+
<dependency>
61+
<!-- kyuubi distribution provides logging classes -->
62+
<groupId>org.slf4j</groupId>
63+
<artifactId>slf4j-api</artifactId>
64+
<version>${slf4j.version}</version>
65+
<scope>provided</scope>
66+
</dependency>
5967
<dependency>
6068
<groupId>org.apache.zookeeper</groupId>
6169
<artifactId>zookeeper</artifactId>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.zookeeper.server.persistence;
20+
21+
import java.io.BufferedInputStream;
22+
import java.io.BufferedOutputStream;
23+
import java.io.File;
24+
import java.io.FileInputStream;
25+
import java.io.FileNotFoundException;
26+
import java.io.FileOutputStream;
27+
import java.io.IOException;
28+
import java.io.InputStream;
29+
import java.io.OutputStream;
30+
import java.io.RandomAccessFile;
31+
import java.nio.ByteBuffer;
32+
import java.util.Arrays;
33+
import java.util.zip.Adler32;
34+
import java.util.zip.CheckedInputStream;
35+
import java.util.zip.CheckedOutputStream;
36+
import java.util.zip.GZIPInputStream;
37+
import java.util.zip.GZIPOutputStream;
38+
import org.apache.jute.InputArchive;
39+
import org.apache.jute.OutputArchive;
40+
import org.apache.zookeeper.common.AtomicFileOutputStream;
41+
import org.slf4j.Logger;
42+
import org.slf4j.LoggerFactory;
43+
import org.xerial.snappy.SnappyCodec;
44+
import org.xerial.snappy.SnappyInputStream;
45+
import org.xerial.snappy.SnappyOutputStream;
46+
47+
/** Represent the Stream used in serialize and deserialize the Snapshot. */
48+
public class SnapStream {
49+
50+
private static final Logger LOG = LoggerFactory.getLogger(SnapStream.class);
51+
52+
public static final String ZOOKEEPER_SHAPSHOT_STREAM_MODE =
53+
"zookeeper.snapshot.compression.method";
54+
55+
private static StreamMode streamMode =
56+
StreamMode.fromString(
57+
System.getProperty(ZOOKEEPER_SHAPSHOT_STREAM_MODE, StreamMode.DEFAULT_MODE.getName()));
58+
59+
static {
60+
LOG.info("{} = {}", ZOOKEEPER_SHAPSHOT_STREAM_MODE, streamMode);
61+
}
62+
63+
public enum StreamMode {
64+
GZIP("gz"),
65+
SNAPPY("snappy"),
66+
CHECKED("");
67+
68+
public static final StreamMode DEFAULT_MODE = CHECKED;
69+
70+
private String name;
71+
72+
StreamMode(String name) {
73+
this.name = name;
74+
}
75+
76+
public String getName() {
77+
return name;
78+
}
79+
80+
public String getFileExtension() {
81+
return name.isEmpty() ? "" : "." + name;
82+
}
83+
84+
public static StreamMode fromString(String name) {
85+
for (StreamMode c : values()) {
86+
if (c.getName().compareToIgnoreCase(name) == 0) {
87+
return c;
88+
}
89+
}
90+
return DEFAULT_MODE;
91+
}
92+
}
93+
94+
/**
95+
* Return the CheckedInputStream based on the extension of the fileName.
96+
*
97+
* @param file the file the InputStream read from
98+
* @return the specific InputStream
99+
* @throws IOException
100+
*/
101+
public static CheckedInputStream getInputStream(File file) throws IOException {
102+
FileInputStream fis = new FileInputStream(file);
103+
InputStream is;
104+
switch (getStreamMode(file.getName())) {
105+
case GZIP:
106+
is = new GZIPInputStream(fis);
107+
break;
108+
case SNAPPY:
109+
is = new SnappyInputStream(fis);
110+
break;
111+
case CHECKED:
112+
default:
113+
is = new BufferedInputStream(fis);
114+
}
115+
return new CheckedInputStream(is, new Adler32());
116+
}
117+
118+
/**
119+
* Return the OutputStream based on predefined stream mode.
120+
*
121+
* @param file the file the OutputStream writes to
122+
* @param fsync sync the file immediately after write
123+
* @return the specific OutputStream
124+
* @throws IOException
125+
*/
126+
public static CheckedOutputStream getOutputStream(File file, boolean fsync) throws IOException {
127+
OutputStream fos = fsync ? new AtomicFileOutputStream(file) : new FileOutputStream(file);
128+
OutputStream os;
129+
switch (streamMode) {
130+
case GZIP:
131+
os = new GZIPOutputStream(fos);
132+
break;
133+
case SNAPPY:
134+
os = new SnappyOutputStream(fos);
135+
break;
136+
case CHECKED:
137+
default:
138+
os = new BufferedOutputStream(fos);
139+
}
140+
return new CheckedOutputStream(os, new Adler32());
141+
}
142+
143+
/**
144+
* Write specific seal to the OutputArchive and close the OutputStream. Currently, only
145+
* CheckedOutputStream will write it's checkSum to the end of the stream.
146+
*/
147+
public static void sealStream(CheckedOutputStream os, OutputArchive oa) throws IOException {
148+
long val = os.getChecksum().getValue();
149+
oa.writeLong(val, "val");
150+
oa.writeString("/", "path");
151+
}
152+
153+
/**
154+
* Verify the integrity of the seal, only CheckedInputStream will verify the checkSum of the
155+
* content.
156+
*/
157+
static void checkSealIntegrity(CheckedInputStream is, InputArchive ia) throws IOException {
158+
long checkSum = is.getChecksum().getValue();
159+
long val = ia.readLong("val");
160+
ia.readString("path"); // Read and ignore "/" written by SealStream.
161+
if (val != checkSum) {
162+
throw new IOException("CRC corruption");
163+
}
164+
}
165+
166+
/**
167+
* Verifies that the file is a valid snapshot. Snapshot may be invalid if it's incomplete as in a
168+
* situation when the server dies while in the process of storing a snapshot. Any files that are
169+
* improperly formated or corrupted are invalid. Any file that is not a snapshot is also an
170+
* invalid snapshot.
171+
*
172+
* @param file file to verify
173+
* @return true if the snapshot is valid
174+
* @throws IOException
175+
*/
176+
public static boolean isValidSnapshot(File file) throws IOException {
177+
if (file == null || Util.getZxidFromName(file.getName(), FileSnap.SNAPSHOT_FILE_PREFIX) == -1) {
178+
return false;
179+
}
180+
181+
boolean isValid = false;
182+
switch (getStreamMode(file.getName())) {
183+
case GZIP:
184+
isValid = isValidGZipStream(file);
185+
break;
186+
case SNAPPY:
187+
isValid = isValidSnappyStream(file);
188+
break;
189+
case CHECKED:
190+
default:
191+
isValid = isValidCheckedStream(file);
192+
}
193+
return isValid;
194+
}
195+
196+
public static void setStreamMode(StreamMode mode) {
197+
streamMode = mode;
198+
}
199+
200+
public static StreamMode getStreamMode() {
201+
return streamMode;
202+
}
203+
204+
/**
205+
* Detect the stream mode from file name extension
206+
*
207+
* @param fileName
208+
* @return
209+
*/
210+
public static StreamMode getStreamMode(String fileName) {
211+
String[] splitSnapName = fileName.split("\\.");
212+
213+
// Use file extension to detect format
214+
if (splitSnapName.length > 1) {
215+
String mode = splitSnapName[splitSnapName.length - 1];
216+
return StreamMode.fromString(mode);
217+
}
218+
219+
return StreamMode.CHECKED;
220+
}
221+
222+
/**
223+
* Certify the GZip stream integrity by checking the header for the GZip magic string
224+
*
225+
* @param f file to verify
226+
* @return true if it has the correct GZip magic string
227+
* @throws IOException
228+
*/
229+
private static boolean isValidGZipStream(File f) throws IOException {
230+
byte[] byteArray = new byte[2];
231+
try (FileInputStream fis = new FileInputStream(f)) {
232+
if (2 != fis.read(byteArray, 0, 2)) {
233+
LOG.error("Read incorrect number of bytes from {}", f.getName());
234+
return false;
235+
}
236+
ByteBuffer bb = ByteBuffer.wrap(byteArray);
237+
byte[] magicHeader = new byte[2];
238+
bb.get(magicHeader, 0, 2);
239+
int magic = magicHeader[0] & 0xff | ((magicHeader[1] << 8) & 0xff00);
240+
return magic == GZIPInputStream.GZIP_MAGIC;
241+
} catch (FileNotFoundException e) {
242+
LOG.error("Unable to open file {}", f.getName(), e);
243+
return false;
244+
}
245+
}
246+
247+
/**
248+
* Certify the Snappy stream integrity by checking the header for the Snappy magic string
249+
*
250+
* @param f file to verify
251+
* @return true if it has the correct Snappy magic string
252+
* @throws IOException
253+
*/
254+
private static boolean isValidSnappyStream(File f) throws IOException {
255+
byte[] byteArray = new byte[SnappyCodec.MAGIC_LEN];
256+
try (FileInputStream fis = new FileInputStream(f)) {
257+
if (SnappyCodec.MAGIC_LEN != fis.read(byteArray, 0, SnappyCodec.MAGIC_LEN)) {
258+
LOG.error("Read incorrect number of bytes from {}", f.getName());
259+
return false;
260+
}
261+
ByteBuffer bb = ByteBuffer.wrap(byteArray);
262+
byte[] magicHeader = new byte[SnappyCodec.MAGIC_LEN];
263+
bb.get(magicHeader, 0, SnappyCodec.MAGIC_LEN);
264+
return Arrays.equals(magicHeader, SnappyCodec.getMagicHeader());
265+
} catch (FileNotFoundException e) {
266+
LOG.error("Unable to open file {}", f.getName(), e);
267+
return false;
268+
}
269+
}
270+
271+
/**
272+
* Certify the Checked stream integrity by checking the header length and format
273+
*
274+
* @param f file to verify
275+
* @return true if it has the correct header
276+
* @throws IOException
277+
*/
278+
private static boolean isValidCheckedStream(File f) throws IOException {
279+
try (RandomAccessFile raf = new RandomAccessFile(f, "r")) {
280+
// including the header and the last / bytes
281+
// the snapshot should be at least 10 bytes
282+
if (raf.length() < 10) {
283+
return false;
284+
}
285+
286+
raf.seek(raf.length() - 5);
287+
byte[] bytes = new byte[5];
288+
int readlen = 0;
289+
int l;
290+
while (readlen < 5 && (l = raf.read(bytes, readlen, bytes.length - readlen)) >= 0) {
291+
readlen += l;
292+
}
293+
if (readlen != bytes.length) {
294+
LOG.info("Invalid snapshot {}. too short, len = {} bytes", f.getName(), readlen);
295+
return false;
296+
}
297+
ByteBuffer bb = ByteBuffer.wrap(bytes);
298+
int len = bb.getInt();
299+
byte b = bb.get();
300+
if (len != 1 || b != '/') {
301+
LOG.info("Invalid snapshot {}. len = {}, byte = {}", f.getName(), len, (b & 0xff));
302+
return false;
303+
}
304+
}
305+
306+
return true;
307+
}
308+
}

0 commit comments

Comments
 (0)