Skip to content

Commit

Permalink
重构 MultiPart 解析器 (#99)
Browse files Browse the repository at this point in the history
  • Loading branch information
scx567888 authored Oct 14, 2024
1 parent 4629cc4 commit ed6ff3f
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public static ScxHttpHeadersWritable parseHeaders(String headersStr) {
var lines = headersStr.split("\r\n");

for (var line : lines) {
int i = line.indexOf(":");
int i = line.indexOf(':');
if (i != -1) {
var key = line.substring(0, i).trim();
var value = line.substring(i + 1).trim();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package cool.scx.http.media.multi_part;

import cool.scx.common.util.ArrayUtils;
import cool.scx.http.ScxHttpHeaders;
import cool.scx.http.ScxHttpHeadersWritable;
import cool.scx.io.InputStreamDataSupplier;
Expand All @@ -16,15 +15,12 @@ public class MultiPartStream implements MultiPart, Iterator<MultiPartPart> {

protected static final byte[] CRLF_CRLF_BYTES = "\r\n\r\n".getBytes();
protected final LinkedDataReader linkedDataReader;
protected final byte[] boundaryHeadCRLFBytes;
protected final byte[] boundaryENDBytes;
protected final byte[] boundaryBytes;
protected boolean hasNextPart;
protected String boundary;

public MultiPartStream(InputStream inputStream, String boundary) {
var boundaryHeadBytes = ArrayUtils.concat("--".getBytes(), boundary.getBytes());
this.boundaryHeadCRLFBytes = ArrayUtils.concat(boundaryHeadBytes, "\r\n".getBytes());
this.boundaryENDBytes = ArrayUtils.concat(boundaryHeadBytes, "--".getBytes());
this.boundaryBytes = ("--" + boundary).getBytes();
this.linkedDataReader = new LinkedDataReader(new InputStreamDataSupplier(inputStream));
this.hasNextPart = readNext();
}
Expand All @@ -34,36 +30,42 @@ public ScxHttpHeadersWritable readToHeaders() {
// head /r/n
// /r/n
// content
var headersBytes = linkedDataReader.readMatch(CRLF_CRLF_BYTES);
var headersBytes = linkedDataReader.readUntil(CRLF_CRLF_BYTES);
var headersStr = new String(headersBytes);
return ScxHttpHeaders.of(headersStr);
}

public byte[] readContentToByte() throws IOException {
//我们需要查找终结点 先假设不是最后一个 那我们就需要查找下一个开始位置
//因为正常的表单一定是 --xxxxxx 结尾的 所以我们只需要找 下一个分块的起始位置作为结束位置即可
try {
var i = linkedDataReader.indexOf(boundaryHeadCRLFBytes);
var i = linkedDataReader.indexOf(boundaryBytes);
// i - 2 因为我们不需要读取内容结尾的 \r\n
var bytes = linkedDataReader.read(i - 2);
//跳过 \r\n 方便后续读取
linkedDataReader.skip(2);
return bytes;
} catch (NoMatchFoundException e) {
//可能是最后一个查找 最终终结点
var i = linkedDataReader.indexOf(boundaryENDBytes);
var bytes = linkedDataReader.read(i - 2);
//跳过 \r\n 方便后续读取
linkedDataReader.skip(2);
return bytes;
// 理论上一个正常的 MultiPart 不会有这种情况
throw new RuntimeException("异常状态 !!!");
}
}

public boolean readNext() {
//查找 --xxxxxxxxx\r\n 没有代表 读取到结尾
//查找 --xxxxxxxxx
try {
var i = linkedDataReader.indexOf(boundaryHeadCRLFBytes);
linkedDataReader.skip(i + boundaryHeadCRLFBytes.length);
return true;
var i = linkedDataReader.indexOf(boundaryBytes);
linkedDataReader.skip(i + boundaryBytes.length);
//向后读取两个字节
var a = linkedDataReader.read();
var b = linkedDataReader.read();
// 判断 是 \r\n or --
if (a == '\r' && b == '\n') { //还有数据
return true;
} else if (a == '-' && b == '-') { // 读取到了终结符
return false;
} else { // 理论上一个正常的 MultiPart 不会有这种情况
throw new RuntimeException("未知字符 !!! ");
}
} catch (NoMatchFoundException e) {
return false;
}
Expand Down Expand Up @@ -91,10 +93,11 @@ public MultiPartPart next() {
}
try {

var part = new MultiPartPartImpl();

// 读取当前部分的头部信息
var headers = readToHeaders();

var part = new MultiPartPartImpl().headers(headers);
part.headers(headers);

//读取内容
var content = readContentToByte();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,14 @@ public Path readContentToPath(Path path) throws IOException {
try (output) {
//我们需要查找终结点 先假设不是最后一个 那我们就需要查找下一个开始位置
try {
var i = linkedDataReader.indexOf(boundaryHeadCRLFBytes);
var i = linkedDataReader.indexOf(boundaryBytes);
// i - 2 因为我们不需要读取内容结尾的 \r\n
linkedDataReader.read(output, i - 2);
//跳过 \r\n 方便后续读取
linkedDataReader.skip(2);
} catch (NoMatchFoundException e) {
//可能是最后一个查找 最终终结点
var i = linkedDataReader.indexOf(boundaryENDBytes);
linkedDataReader.read(output, i - 2);
//跳过 \r\n 方便后续读取
linkedDataReader.skip(2);
// 理论上一个正常的 MultiPart 不会有这种情况
throw new RuntimeException("异常状态 !!!");
}
}

Expand All @@ -61,10 +58,11 @@ public MultiPartPart next() {
}
try {

var part = new MultiPartPartImpl();

// 读取当前部分的头部信息
var headers = readToHeaders();

var part = new MultiPartPartImpl().headers(headers);
part.headers(headers);

var b = needCached(headers);
if (b) {
Expand All @@ -74,6 +72,7 @@ public MultiPartPart next() {
var content = readContentToByte();
part.body(content);
}

// 检查是否有下一个部分
hasNextPart = readNext();

Expand Down
28 changes: 28 additions & 0 deletions scx-http/src/test/java/cool/scx/http/test/HeadersTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package cool.scx.http.test;

import cool.scx.http.MediaType;
import cool.scx.http.ScxHttpHeaders;
import cool.scx.http.content_type.ContentType;

import java.nio.charset.StandardCharsets;

public class HeadersTest {

public static void main(String[] args) {
test1();
}

public static void test1() {
long l = System.nanoTime();
for (int i = 0; i < 9999; i++) {
var h = ScxHttpHeaders.of();
h.add("Content-Disposition", "form-data; name=myname");
h.contentLength(100);
h.contentType(ContentType.of(MediaType.APPLICATION_JSON).charset(StandardCharsets.UTF_8));
var s = h.encode();
var nw = ScxHttpHeaders.of(s);
}
System.out.println((System.nanoTime() - l) / 1000_000);
}

}
4 changes: 4 additions & 0 deletions scx-http/src/test/java/cool/scx/http/test/MultiPartTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package cool.scx.http.test;

import cool.scx.common.util.ArrayUtils;
import cool.scx.http.MediaType;
import cool.scx.http.ScxHttpHeaders;
import cool.scx.http.content_type.ContentType;
Expand Down Expand Up @@ -36,6 +37,9 @@ public static void test1() {
ss.write(b);
byte[] byteArray = b.toByteArray();

//复制两遍查看是否会产生错误的读取
byteArray = ArrayUtils.concat(byteArray, byteArray);


long l = System.nanoTime();
for (int j = 0; j < 9999; j++) {
Expand Down
6 changes: 3 additions & 3 deletions scx-io/src/main/java/cool/scx/io/ByteArrayDataReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void read(OutputStream outputStream, int maxLength) throws NoMoreDataExce
}

@Override
public byte get() throws NoMoreDataException {
public byte peek() throws NoMoreDataException {
try {
return bytes[position];
} catch (ArrayIndexOutOfBoundsException e) {
Expand All @@ -62,7 +62,7 @@ public byte get() throws NoMoreDataException {
}

@Override
public byte[] get(int maxLength) throws NoMoreDataException {
public byte[] peek(int maxLength) throws NoMoreDataException {
int availableLength = bytes.length - position;
if (availableLength <= 0) {
throw new NoMoreDataException();
Expand All @@ -74,7 +74,7 @@ public byte[] get(int maxLength) throws NoMoreDataException {
}

@Override
public void get(OutputStream outputStream, int maxLength) throws NoMoreDataException {
public void peek(OutputStream outputStream, int maxLength) throws NoMoreDataException {
int availableLength = bytes.length - position;
if (availableLength <= 0) {
throw new NoMoreDataException();
Expand Down
30 changes: 21 additions & 9 deletions scx-io/src/main/java/cool/scx/io/DataReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public interface DataReader {
* 当没有更多的数据时会抛出异常
*
* @return byte
* @throws NoMoreDataException 没有更多数据时抛出
*/
byte read() throws NoMoreDataException;

Expand All @@ -20,6 +21,7 @@ public interface DataReader {
*
* @param maxLength 最大长度
* @return bytes
* @throws NoMoreDataException 没有更多数据时抛出
*/
byte[] read(int maxLength) throws NoMoreDataException;

Expand All @@ -28,6 +30,7 @@ public interface DataReader {
* 当没有更多的数据时会抛出异常
*
* @param maxLength 最大长度
* @throws NoMoreDataException 没有更多数据时抛出
*/
void read(OutputStream outputStream, int maxLength) throws NoMoreDataException;

Expand All @@ -36,31 +39,35 @@ public interface DataReader {
* 当没有更多的数据时会抛出异常
*
* @return byte
* @throws NoMoreDataException 没有更多数据时抛出
*/
byte get() throws NoMoreDataException;
byte peek() throws NoMoreDataException;

/**
* 读取指定长度字节 (指针不会移动)
* 当没有更多的数据时会抛出异常
*
* @param maxLength 最大长度
* @return byte
* @throws NoMoreDataException 没有更多数据时抛出
*/
byte[] get(int maxLength) throws NoMoreDataException;
byte[] peek(int maxLength) throws NoMoreDataException;

/**
* 向 outputStream 写入指定长度字节 (指针不会移动)
* 当没有更多的数据时会抛出异常
*
* @param maxLength 最大长度
* @throws NoMoreDataException 没有更多数据时抛出
*/
void get(OutputStream outputStream, int maxLength) throws NoMoreDataException;
void peek(OutputStream outputStream, int maxLength) throws NoMoreDataException;

/**
* 查找 指定字节 第一次出现的 index (指针不会移动)
*
* @param b 指定字节
* @return index 或者 -1 (未找到)
* @throws NoMatchFoundException 没有匹配时抛出
*/
int indexOf(byte b) throws NoMatchFoundException;

Expand All @@ -69,6 +76,7 @@ public interface DataReader {
*
* @param b 指定字节数组
* @return index 或者 -1 (未找到)
* @throws NoMatchFoundException 没有匹配时抛出
*/
int indexOf(byte[] b) throws NoMatchFoundException;

Expand All @@ -84,8 +92,9 @@ public interface DataReader {
*
* @param b 指定字节
* @return bytes
* @throws NoMatchFoundException 没有匹配时抛出
*/
default byte[] readMatch(byte b) throws NoMatchFoundException {
default byte[] readUntil(byte b) throws NoMatchFoundException {
var index = indexOf(b);
var data = read(index);
skip(1);
Expand All @@ -97,8 +106,9 @@ default byte[] readMatch(byte b) throws NoMatchFoundException {
*
* @param b 指定字节
* @return bytes
* @throws NoMatchFoundException 没有匹配时抛出
*/
default byte[] readMatch(byte[] b) throws NoMatchFoundException {
default byte[] readUntil(byte[] b) throws NoMatchFoundException {
var index = indexOf(b);
var data = read(index);
skip(b.length);
Expand All @@ -110,21 +120,23 @@ default byte[] readMatch(byte[] b) throws NoMatchFoundException {
*
* @param b 指定字节
* @return bytes
* @throws NoMatchFoundException 没有匹配时抛出
*/
default byte[] getMatch(byte b) throws NoMatchFoundException {
default byte[] peekUntil(byte b) throws NoMatchFoundException {
var index = indexOf(b);
return get(index);
return peek(index);
}

/**
* 读取 直到 找到匹配的字节 (指针不会移动)
*
* @param b 指定字节
* @return bytes
* @throws NoMatchFoundException 没有匹配时抛出
*/
default byte[] getMatch(byte[] b) throws NoMatchFoundException {
default byte[] peekUntil(byte[] b) throws NoMatchFoundException {
var index = indexOf(b);
return get(index);
return peek(index);
}

}
6 changes: 3 additions & 3 deletions scx-io/src/main/java/cool/scx/io/LinkedDataReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,13 @@ public void read(OutputStream outputStream, int maxLength) throws NoMoreDataExce
}

@Override
public byte get() throws NoMoreDataException {
public byte peek() throws NoMoreDataException {
ensureAvailable();
return head.bytes[head.position];
}

@Override
public byte[] get(int maxLength) throws NoMoreDataException {
public byte[] peek(int maxLength) throws NoMoreDataException {
ensureAvailable(); // 确保至少有一个字节可读
var result = new byte[maxLength];
var remaining = maxLength; //剩余字节数
Expand Down Expand Up @@ -169,7 +169,7 @@ public byte[] get(int maxLength) throws NoMoreDataException {
}

@Override
public void get(OutputStream outputStream, int maxLength) throws NoMoreDataException {
public void peek(OutputStream outputStream, int maxLength) throws NoMoreDataException {
ensureAvailable(); // 确保至少有一个字节可读
var result = new byte[maxLength];
var remaining = maxLength; //剩余字节数
Expand Down
6 changes: 3 additions & 3 deletions scx-io/src/test/java/cool/scx/io/test/DataReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ public static void test1() {
var dataReader = new ByteArrayDataReader("11112345678".getBytes(StandardCharsets.UTF_8));

//不会影响读取
dataReader.get(99);
dataReader.peek(99);

dataReader.indexOf("1".getBytes(StandardCharsets.UTF_8));

var index = dataReader.readMatch("123".getBytes());
var index = dataReader.readUntil("123".getBytes());

try {
//第二次应该匹配失败
var index1 = dataReader.readMatch("123".getBytes());
var index1 = dataReader.readUntil("123".getBytes());
} catch (NoMatchFoundException _) {

}
Expand Down

0 comments on commit ed6ff3f

Please sign in to comment.