Skip to content

Commit

Permalink
Issue 22779 improve db metadata (#22834)
Browse files Browse the repository at this point in the history
* #22779 adding first changes

* #22779 the file can holds a binary buffer or a file

* #22779 adding more changes

* #22779 just adding a comment for future

* #22779 adding a normalization of the file repo manager, now it is grouping on directories

* #22779 minor changes to handle the hashes on folders

* #22779 adding more unit test

* #22779 adding more unit test

* #22779 adding a fix for an unit test

* #22779 adding more doc

---------

Co-authored-by: fabrizzio-dotCMS <fabrizzio@dotCMS.com>
  • Loading branch information
jdotcms and fabrizzio-dotCMS authored Jan 30, 2023
1 parent 6b2a9be commit 6590a84
Show file tree
Hide file tree
Showing 13 changed files with 535 additions and 100 deletions.
2 changes: 2 additions & 0 deletions dotCMS/src/integration-test/java/com/dotcms/MainSuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
import com.dotcms.security.multipart.SecureFileValidatorTest;
import com.dotcms.storage.FileMetadataAPITest;
import com.dotcms.storage.StoragePersistenceAPITest;
import com.dotcms.storage.repository.HashedLocalFileRepositoryManagerTest;
import com.dotcms.translate.GoogleTranslationServiceIntegrationTest;
import com.dotcms.uuid.shorty.LegacyShortyIdApiTest;
import com.dotcms.variant.VariantAPITest;
Expand Down Expand Up @@ -631,6 +632,7 @@
StoryBlockAPITest.class,
UtilMethodsITest.class,
Task220912UpdateCorrectShowOnMenuPropertyTest.class,
HashedLocalFileRepositoryManagerTest.class,
PersonaActionletTest.class,
SendRedirectActionletTest.class,
SetRequestAttributeActionletTest.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package com.dotcms.storage;

import com.dotcms.util.IntegrationTestInitService;
import com.dotmarketing.util.FileUtil;
import com.google.common.io.Files;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

public class BinaryBlockFilJoinerImplTest {


@BeforeClass
public static void prepare() throws Exception {
IntegrationTestInitService.getInstance().init();
}

/**
* Method to test: {@link BinaryBlockFileJoinerImpl#join(byte[], int, int)}
* Given Scenario: Adds less than 1000 bytes
* ExpectedResult: should return the binary and not a file
*
*/
@Test
public void Test_Easy_Path() throws IOException {

final File file = FileUtil.createTemporaryFile("dot-db-storage-recovery", ".tmp", true);
final BinaryBlockFileJoinerImpl binaryBlockFileJoiner = new BinaryBlockFileJoinerImpl(file, 1000);

final String s1 = "one,two,three,four,five";
binaryBlockFileJoiner.join(s1.getBytes(StandardCharsets.UTF_8), 0, s1.length());

final String s2 = "six,seven,eight,nine,ten";
binaryBlockFileJoiner.join(s2.getBytes(StandardCharsets.UTF_8), 0, s2.length());

final byte [] bytes = binaryBlockFileJoiner.getBytes();
Assert.assertNotNull(bytes);
Assert.assertEquals(new String(bytes,StandardCharsets.UTF_8).trim(),s1+s2);
}

/**
* Method to test: {@link BinaryBlockFileJoinerImpl#join(byte[], int, int)}
* Given Scenario: Adds less than 1000 bytes
* ExpectedResult: should return the binary and not a file
*
*/
@Test
public void Test_Flush() throws IOException {

final File file = FileUtil.createTemporaryFile("dot-db-storage-recovery", ".tmp", true);
final BinaryBlockFileJoinerImpl binaryBlockFileJoiner = new BinaryBlockFileJoinerImpl(file, 1000);
final StringBuilder builder = new StringBuilder();
for (int i=0;i<100;++i) {
final String s1 = "one,two,three,four,five,six,seven,eight,nine,ten,one,two,three,four,five,six,seven,eight,nine,ten,one,two,three,four," +
"five,six,seven,eight,nine,ten,one,two,three,four,five,six,seven,eight,nine,ten" + "five,six,seven,eight,nine,ten,one,two,three,four,five,six,seven,eight,nine,ten" +
"five,six,seven,eight,nine,ten,one,two,three,four,five,six,seven,eight,nine,ten";
builder.append(s1);
binaryBlockFileJoiner.join(s1.getBytes(StandardCharsets.UTF_8), 0, s1.length());
}

final byte [] bytes = binaryBlockFileJoiner.getBytes(); //
Assert.assertNull(bytes);

binaryBlockFileJoiner.flush();
binaryBlockFileJoiner.close();

Assert.assertTrue(file.exists());
final String fileContent = Files.asCharSource(file, StandardCharsets.UTF_8).read();

Assert.assertEquals(fileContent, builder.toString());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import com.dotcms.datagen.TestDataUtils.TestFile;
import com.dotcms.storage.StoragePersistenceProvider.INSTANCE;
import com.dotcms.storage.repository.BinaryFileWrapper;
import com.dotcms.util.ConfigTestHelper;
import com.dotcms.util.IntegrationTestInitService;
import com.dotmarketing.exception.DotDataException;
Expand Down Expand Up @@ -144,7 +145,8 @@ public void Test_Storage_Push_File_Then_Recover_Then_Remove_Group(final TestCase
assertTrue(pullFile.canRead());
assertTrue(pullFile.canWrite());
assertNotSame(pushFile, pullFile);
assertEquals(pullFile.length(), pushFile.length());
final long pullFileLength = pullFile instanceof BinaryFileWrapper? BinaryFileWrapper.class.cast(pullFile).getBufferByte().length: pullFile.length();
assertEquals(pullFileLength, pushFile.length());
} finally {
count = storage.deleteGroup(groupName);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package com.dotcms.storage.repository;

import com.dotcms.IntegrationTestBase;
import com.dotmarketing.util.FileUtil;
import com.dotmarketing.util.UUIDUtil;
import io.vavr.control.Try;
import org.apache.commons.io.FileUtils;
import org.junit.Assert;
import org.junit.Test;

import java.io.File;
import java.nio.charset.StandardCharsets;

public class HashedLocalFileRepositoryManagerTest extends IntegrationTestBase {

/**
* Method to test: {@link HashedLocalFileRepositoryManager#normalizeFileInSubDirectoryAndFile(String)}
* Given Scenario: Test diff scenarios for the method
* ExpectedResult: The results perform based on the expected invariant
*
*/
@Test
public void test_normalizeFileInSubDirectoryAndFile() {

final HashedLocalFileRepositoryManager hashedLocalFileRepositoryManager = new HashedLocalFileRepositoryManager("./");

Assert.assertNull(hashedLocalFileRepositoryManager.normalizeFileInSubDirectoryAndFile(null));
final String shortHash1 = "1";
Assert.assertEquals(shortHash1, hashedLocalFileRepositoryManager.normalizeFileInSubDirectoryAndFile(shortHash1));
final String shortHash2 = "12";
Assert.assertEquals(shortHash2, hashedLocalFileRepositoryManager.normalizeFileInSubDirectoryAndFile(shortHash2));
final String shortHash3 = "1234";
Assert.assertEquals(shortHash3, hashedLocalFileRepositoryManager.normalizeFileInSubDirectoryAndFile(shortHash3));
final String hash = "f813463c714e009df1227f706e290e01";
Assert.assertEquals("/f813/" + hash , hashedLocalFileRepositoryManager.normalizeFileInSubDirectoryAndFile(hash));
}

/**
* Method to test: {@link HashedLocalFileRepositoryManager#exists(String)} (String)} {@link HashedLocalFileRepositoryManager#getOrCreateFile(String)}
* Given Scenario: Test with non exiting hash, then get or create it, then checks it exists and retrieve again
* ExpectedResult: The methods should return the right information based on the scenarios
*
*/
@Test
public void test_GetOrCreate_and_Exits_() {

final String basePath = "./" + UUIDUtil.uuid();

try {
final HashedLocalFileRepositoryManager hashedLocalFileRepositoryManager = new HashedLocalFileRepositoryManager(basePath);
final String hash = "f813463c714e009df1227f706e290e01";
Assert.assertFalse(hashedLocalFileRepositoryManager.exists(hash));
final File file = hashedLocalFileRepositoryManager.getOrCreateFile(hash);
Assert.assertNotNull(file);
Assert.assertFalse(file.exists());
Try.run(()->FileUtils.write(file, "2" +
"test", StandardCharsets.UTF_8));
Assert.assertTrue(hashedLocalFileRepositoryManager.exists(hash));
Assert.assertEquals(hash, file.getName());
Assert.assertEquals("f813", file.getParentFile().getName());


final File file2 = hashedLocalFileRepositoryManager.getOrCreateFile(hash);
Assert.assertNotNull(file2);
Assert.assertTrue(file2.exists());
Assert.assertEquals(hash, file.getName());
Assert.assertEquals("f813", file.getParentFile().getName());

} finally {

Try.run(()->FileUtil.deleteDir(basePath));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.dotcms.storage;

import com.dotcms.util.FileJoiner;
import com.dotcms.util.FileJoinerImpl;
import org.apache.commons.io.output.DeferredFileOutputStream;

import java.io.File;

/**
* Join into the bytes into a memory until reached the max size, switch to a file
* {@link BinaryBlockFileJoinerImpl#getBytes()} is not null when the file contents is on memory, otherwise is null and the content is into the file.
* @author jsanca
*/
public class BinaryBlockFileJoinerImpl extends FileJoinerImpl implements FileJoiner {


public BinaryBlockFileJoinerImpl(final File file, final int maxSize) {
super(new DeferredFileOutputStream(maxSize, file));
}

public byte [] getBytes() {

final DeferredFileOutputStream outputStream = (DeferredFileOutputStream)this.getOutputStream();
return outputStream.isInMemory()?outputStream.getData():null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@
import static com.dotcms.storage.model.BasicMetadataFields.SHA256_META_KEY;

import com.dotcms.concurrent.DotConcurrentFactory;
import com.dotcms.storage.repository.BinaryFileWrapper;
import com.dotcms.storage.repository.FileRepositoryManager;
import com.dotcms.storage.repository.HashedLocalFileRepositoryManager;
import com.dotcms.storage.repository.LocalFileRepositoryManager;
import com.dotcms.storage.repository.TempFileRepositoryManager;
import com.dotcms.util.CloseUtils;
import com.dotcms.util.CollectionsUtils;
import com.dotcms.util.FileByteSplitter;
import com.dotcms.util.FileJoiner;
import com.dotcms.util.FileJoinerImpl;
import com.dotcms.util.ReturnableDelegate;
import com.dotmarketing.business.APILocator;
import com.dotmarketing.common.db.DotConnect;
Expand All @@ -25,10 +30,11 @@
import com.liferay.util.Encryptor;
import com.liferay.util.Encryptor.Hashing;
import com.liferay.util.HashBuilder;
import com.liferay.util.PropertiesUtil;
import com.liferay.util.StringPool;
import io.vavr.Tuple2;
import io.vavr.control.Try;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
Expand All @@ -41,7 +47,6 @@
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
Expand All @@ -50,13 +55,15 @@
import java.util.concurrent.Future;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.apache.commons.io.output.DeferredFileOutputStream;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.commons.lang3.mutable.MutableObject;

/**
* Represents a Storage on the database It supports big files since provides the ability to split
* fat object in smaller pieces, see {@link FileByteSplitter} and {@link com.dotcms.util.FileJoiner}
* fat object in smaller pieces, see {@link FileByteSplitter} and {@link FileJoinerImpl}
* to get more details about the process
*
* @author jsanca
Expand All @@ -65,6 +72,25 @@ public class DataBaseStoragePersistenceAPIImpl implements StoragePersistenceAPI

private static final String DATABASE_STORAGE_JDBC_POOL_NAME = "DATABASE_STORAGE_JDBC_POOL_NAME";
private static final String DB_STORAGE_CHUNK_SIZE = "DB_STORAGE_CHUNK_SIZE";
private final FileRepositoryManager fileRepositoryManager = this.getFileRepository();
private static final int DATABASE_FILE_BUFFER_SIZE = Config.getIntProperty("DATABASE_FILE_BUFFER_SIZE", 2000);

private static final String DATA_BASE_STORAGE_FILE_REPO_TYPE = Config.getStringProperty(
"DATA_BASE_STORAGE_FILE_REPO_TYPE", FileRepositoryManager.TEMP_REPO).toUpperCase();

private FileRepositoryManager getFileRepository() {

switch (DATA_BASE_STORAGE_FILE_REPO_TYPE) {

case FileRepositoryManager.HASH_LOCAL_REPO:
return new HashedLocalFileRepositoryManager();
case FileRepositoryManager.LOCAL_REPO:
return new LocalFileRepositoryManager();

}

return new TempFileRepositoryManager();
}

/**
* custom external connection provider method in case we want to store stuff outside our db
Expand Down Expand Up @@ -695,8 +721,15 @@ public File pullFile(final String groupName, final String path) throws DotDataEx
}

private File createJoinFile(final String hashId, final Connection connection) throws IOException, DotDataException {
final File file = FileUtil.createTemporaryFile("dot-db-storage-recovery", ".tmp", true);
try (final FileJoiner fileJoiner = new FileJoiner(file)) {

if (this.fileRepositoryManager.exists(hashId)) {

return this.fileRepositoryManager.getOrCreateFile(hashId);
}

File file = this.fileRepositoryManager.getOrCreateFile(hashId);
final DeferredFileOutputStream deferredFileOutputStream = null;
try (final BinaryBlockFileJoinerImpl fileJoiner = new BinaryBlockFileJoinerImpl(file, DATABASE_FILE_BUFFER_SIZE)) {
final HashBuilder fileHashBuilder = Try.of(Hashing::sha256)
.getOrElseThrow(DotRuntimeException::new);

Expand Down Expand Up @@ -727,6 +760,12 @@ private File createJoinFile(final String hashId, final Connection connection) th
"The file hash `%s` isn't valid. it doesn't match the records in `storage_data/storage_x_data` or they don't exist. ",
hashId));
}

// this means file is buffered and needs to wrap the binary
if (fileJoiner.getBytes() != null) {

file = new BinaryFileWrapper(file, fileJoiner.getBytes());
}
} catch (Exception e) {
throw new DotDataException(e.getMessage(), e);
}
Expand All @@ -742,12 +781,16 @@ public Object pullObject(final String groupName, final String path,
final File file = pullFile(groupName, path);

if (null != file) {
object = Try.of(() -> {
try (InputStream inputStream = Files.newInputStream(file.toPath())) {
return readerDelegate.read(inputStream);
}
}
).getOrNull();
object =
Try.of(() -> {
try (InputStream inputStream = file instanceof BinaryFileWrapper?
new ByteArrayInputStream(BinaryFileWrapper.class.cast(file).getBufferByte()):
Files.newInputStream(file.toPath())) {

return readerDelegate.read(inputStream);
}
}
).getOrNull();
}

return object;
Expand Down Expand Up @@ -1042,4 +1085,4 @@ void pushHashReference(final Connection connection, final String objectHash, fin
}


}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.dotcms.storage.repository;

import java.io.File;

/**
* This implementation of a File, make the File object as state pattern
* - one state is the default File implementation
* - the other state is a chunk of bytes
*
* This pattern allows to cache in memory, the small files; so upper layers may decide if reads the whole file from the file system
* or just get the mem buffer, this avoids cpu cycles and I/O
* @author jsanca
*/
public class BinaryFileWrapper extends File {

private final byte[] bufferByte;
public BinaryFileWrapper(final File file, final byte[] bufferByte) {
super(file.getPath());
this.bufferByte = bufferByte;
}

public byte[] getBufferByte() {
return bufferByte;
}
}
Loading

0 comments on commit 6590a84

Please sign in to comment.