Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,17 @@ public class ESContentletAPIImpl implements ContentletAPI {

private final static Lazy<Boolean> SET_DEFAULT_VALUES = Lazy.of(()-> Config.getBooleanProperty("CONTENT_API_SET_DEFAULT_VALUES", true));

/**
* Configuration property to control whether to skip throwing FileAssetValidationException
* when a file asset is missing its binary field. When set to true, a debug log message
* will be written instead of throwing an exception. This can be useful during migration
* or bulk operations where some file assets may have missing binaries that need to be
* handled gracefully. Default: false (throw exception)
*/
public static final String SKIP_FILE_ASSET_BINARY_VALIDATION = "SKIP_FILE_ASSET_BINARY_VALIDATION";

private static final Lazy<Boolean> SKIP_FILE_ASSET_BINARY_VALIDATION_FLAG = Lazy.of(() ->
Config.getBooleanProperty(SKIP_FILE_ASSET_BINARY_VALIDATION, false));

private final Lazy<UniqueFieldValidationStrategyResolver> uniqueFieldValidationStrategyResolver;

Expand Down Expand Up @@ -988,6 +999,18 @@ public PaginatedContentlets findContentletsPaginatedByHost(final Host parentHost
.build();
}

@Override
public ESContentletScroll createScrollQuery(final String luceneQuery, final User user,
final boolean respectFrontendRoles, final int batchSize, final String sortBy)
throws DotSecurityException, DotDataException {

// Apply permissions to query
final String queryWithPermissions = applyPermissionsToQuery(luceneQuery, user, respectFrontendRoles);

// Delegate to factory with the permission-filtered query
return contentFactory.createScrollQuery(queryWithPermissions, user, respectFrontendRoles, batchSize, sortBy);
}

@CloseDBIfOpened
@Override
public List<Contentlet> findContentletsByHost(Host parentHost,
Expand Down Expand Up @@ -1561,16 +1584,33 @@ public void addPermissionsToQuery(StringBuffer buffy, User user, List<Role> role

}

@Override
public List<ContentletSearch> searchIndex(String luceneQuery, int limit, int offset,
String sortBy, User user, boolean respectFrontendRoles)
throws DotSecurityException, DotDataException {
boolean isAdmin = false;
List<Role> roles = new ArrayList<>();
/**
* Applies permissions and category permissions to a lucene query string.
* This method handles admin checks, role loading, and permission clauses.
* <p>
* <strong>SECURITY NOTE:</strong> This method MUST be called on any query before
* passing it to Elasticsearch to ensure proper permission filtering.
* </p>
*
* @param luceneQuery The original lucene query string
* @param user The user making the request (required if not respecting frontend roles)
* @param respectFrontendRoles Whether to respect frontend roles
* @return The query with permissions clauses added
* @throws DotSecurityException If user is null and not respecting frontend roles
* @throws DotDataException If there's an error loading roles
*/
protected String applyPermissionsToQuery(final String luceneQuery, final User user,
final boolean respectFrontendRoles) throws DotSecurityException, DotDataException {

// Validate user requirement
if (user == null && !respectFrontendRoles) {
throw new DotSecurityException(
"You must specify a user if you are not respecting frontend roles");
}

// Check if user is admin
boolean isAdmin = false;
List<Role> roles = new ArrayList<>();
if (user != null) {
if (!APILocator.getRoleAPI()
.doesUserHaveRole(user, APILocator.getRoleAPI().loadCMSAdminRole())) {
Expand All @@ -1579,14 +1619,28 @@ public List<ContentletSearch> searchIndex(String luceneQuery, int limit, int off
isAdmin = true;
}
}
final StringBuffer buffy = new StringBuffer(luceneQuery);

// Permissions in the query
if (!isAdmin) {
addPermissionsToQuery(buffy, user, roles, respectFrontendRoles);
addCategoryPermissionsToQuery(buffy, user, roles, respectFrontendRoles);
// If admin, return query unchanged
if (isAdmin) {
return luceneQuery;
}

// Apply permissions
final StringBuffer buffy = new StringBuffer(luceneQuery);
addPermissionsToQuery(buffy, user, roles, respectFrontendRoles);
addCategoryPermissionsToQuery(buffy, user, roles, respectFrontendRoles);

return buffy.toString();
}

@Override
public List<ContentletSearch> searchIndex(String luceneQuery, int limit, int offset,
String sortBy, User user, boolean respectFrontendRoles)
throws DotSecurityException, DotDataException {

// Apply permissions to query
final String queryWithPermissions = applyPermissionsToQuery(luceneQuery, user, respectFrontendRoles);

if (UtilMethods.isSet(sortBy) && sortBy.trim().equalsIgnoreCase("random")) {
sortBy = "random";
}
Expand All @@ -1596,7 +1650,7 @@ public List<ContentletSearch> searchIndex(String luceneQuery, int limit, int off
}

if (limit <= MAX_LIMIT) {
final SearchHits searchHits = contentFactory.indexSearch(buffy.toString(), limit,
final SearchHits searchHits = contentFactory.indexSearch(queryWithPermissions, limit,
offset, sortBy);
final PaginatedArrayList<ContentletSearch> list = new PaginatedArrayList<>();
list.setTotalResults(searchHits.getTotalHits().value);
Expand All @@ -1618,7 +1672,7 @@ public List<ContentletSearch> searchIndex(String luceneQuery, int limit, int off
}
return list;
} else {
return contentFactory.indexSearchScroll(buffy.toString(), sortBy);
return contentFactory.indexSearchScroll(queryWithPermissions, sortBy);
}

}
Expand Down Expand Up @@ -5981,10 +6035,18 @@ private boolean addOrUpdateContentletIdentifier(final Contentlet contentlet,
final String binaryNode =
contentletRaw.getInode() != null ? contentletRaw.getInode()
: BLANK;
throw new FileAssetValidationException(
"Unable to validate field: " + FileAssetAPI.BINARY_FIELD
+ " identifier: " + binaryIdentifier
+ " inode: " + binaryNode);

if (SKIP_FILE_ASSET_BINARY_VALIDATION_FLAG.get()) {
Logger.debug(this,
"Missing binary field " + FileAssetAPI.BINARY_FIELD
+ " for identifier: " + binaryIdentifier
+ ", inode: " + binaryNode);
} else {
throw new FileAssetValidationException(
"Unable to validate field: " + FileAssetAPI.BINARY_FIELD
+ " identifier: " + binaryIdentifier
+ " inode: " + binaryNode);
}
} else {
//We no longer use the old BinaryField to recover the file name.
//From now on we'll recover such value from the field "fileName" presented on the screen.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package com.dotcms.content.elasticsearch.business;

import com.dotmarketing.common.model.ContentletSearch;
import com.dotmarketing.exception.DotDataException;

import java.util.List;

/**
* API for performing scroll-based queries on contentlets in ElasticSearch.
* <p>
* The Scroll API is designed for efficiently retrieving large result sets that exceed
* ElasticSearch's max_result_window limit. Unlike offset-based pagination, scroll maintains
* a search context and retrieves results in batches.
* </p>
* <p>
* The scroll context is initialized automatically upon instantiation, and the first batch
* is fetched and cached. Use {@link #nextBatch()} to retrieve batches sequentially.
* </p>
* <p>
* <strong>IMPORTANT:</strong> Always close the scroll context when done to free server resources.
* Use try-with-resources pattern:
* </p>
* <pre>
* try (ESContentletScroll scroll = factory.createScrollQuery(query, user, false, 100)) {
* List&lt;ContentletSearch&gt; batch;
* while ((batch = scroll.nextBatch()) != null && !batch.isEmpty()) {
* // process batch
* }
* }
* </pre>
*
* @see ContentletFactory#createScrollQuery(String, com.liferay.portal.model.User, boolean, int)
*/
public interface ESContentletScroll extends AutoCloseable {

/**
* Retrieves the next batch of results from the scroll context.
* On the first call, returns the initial batch fetched during construction.
* Subsequent calls fetch and return the next batch.
* Returns an empty list when there are no more results.
*
* @return List of ContentletSearch objects from the next batch (empty if no more results)
* @throws DotDataException if fetching the next batch fails
*/
List<ContentletSearch> nextBatch() throws DotDataException;

/**
* Returns the total number of hits for the query.
* Available immediately after construction.
*
* @return Total number of matching documents
*/
long getTotalHits();

/**
* Checks if there are more results available.
*
* @return true if more results are available, false otherwise
*/
boolean hasMoreResults();

/**
* Clears the scroll context and frees server resources.
* This is called automatically when using try-with-resources.
* Safe to call multiple times.
*/
@Override
void close();
}
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,12 @@ private void proccessSiteDependency(final Host site) {
.forEach(fileContainer -> dependencyProcessor.addAsset(fileContainer,
PusheableAsset.CONTAINER));

PaginatedContentlets contentletsPaginatedByHost = this.contentletAPI.get().findContentletsPaginatedByHost(site,
APILocator.systemUser(), false);
// Content dependencies
tryToAddAllAndProcessDependencies(PusheableAsset.CONTENTLET, contentletsPaginatedByHost,
ManifestReason.INCLUDE_DEPENDENCY_FROM.getMessage(site));
// Content dependencies - use try-with-resources to ensure Scroll context cleanup
try (PaginatedContentlets contentletsPaginatedByHost = this.contentletAPI.get().findContentletsPaginatedByHost(site,
APILocator.systemUser(), false)) {
tryToAddAllAndProcessDependencies(PusheableAsset.CONTENTLET, contentletsPaginatedByHost,
ManifestReason.INCLUDE_DEPENDENCY_FROM.getMessage(site));
}

// Structure dependencies
tryToAddAllAndProcessDependencies(PusheableAsset.CONTENT_TYPE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,39 @@ Contentlet findContentletByIdentifier(String identifier, long languageId, String
*/
public PaginatedContentlets findContentletsPaginatedByHost(Host parentHost, List<Integer> includingContentTypes, List<Integer> excludingContentTypes, User user, boolean respectFrontendRoles) throws DotDataException, DotSecurityException;

/**
* Creates an ElasticSearch Scroll API query with proper permissions applied.
* <p>
* This method should be used instead of directly accessing the factory when you need
* scroll functionality for large result sets. It ensures that permissions are properly
* applied to the query before creating the scroll context.
* </p>
* <p>
* <strong>IMPORTANT:</strong> Always use try-with-resources to ensure the scroll
* context is properly cleaned up:
* </p>
* <pre>
* try (ESContentletScroll scroll = contentletAPI.createScrollQuery(query, user, false, 100, "title asc")) {
* List&lt;ContentletSearch&gt; batch;
* while ((batch = scroll.nextBatch()) != null && !batch.isEmpty()) {
* // process batch
* }
* }
* </pre>
*
* @param luceneQuery The base lucene query (permissions will be added automatically)
* @param user The user making the request (required if not respecting frontend roles)
* @param respectFrontendRoles Whether to respect frontend roles
* @param batchSize Number of results to retrieve per batch
* @param sortBy Sort criteria (e.g., "title asc", "moddate desc")
* @return ESContentletScroll instance for iterating through results
* @throws DotSecurityException If user is null and not respecting frontend roles
* @throws DotDataException If there's an error creating the scroll query
*/
public com.dotcms.content.elasticsearch.business.ESContentletScroll createScrollQuery(
String luceneQuery, User user, boolean respectFrontendRoles, int batchSize, String sortBy)
throws DotSecurityException, DotDataException;

/**
*
* Returns a list of {@link Contentlet} whose parent host matches the given host and whose base-type
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.dotmarketing.portlets.contentlet.business;

import com.dotcms.business.CloseDBIfOpened;
import com.dotcms.content.elasticsearch.business.ESContentletScroll;
import com.dotcms.content.elasticsearch.business.ESSearchResults;
import com.dotcms.content.elasticsearch.business.SearchCriteria;
import com.dotcms.contenttype.model.type.ContentType;
Expand Down Expand Up @@ -1190,6 +1191,29 @@ public PaginatedContentlets findContentletsPaginatedByHost(
return paginatedContentletsByHost;
}

@Override
public ESContentletScroll createScrollQuery(
final String luceneQuery, final com.liferay.portal.model.User user,
final boolean respectFrontendRoles, final int batchSize, final String sortBy)
throws DotSecurityException, DotDataException {
for (ContentletAPIPreHook pre : preHooks) {
boolean preResult = pre.createScrollQuery(luceneQuery, user, respectFrontendRoles, batchSize, sortBy);
if (!preResult) {
String errorMessage = String.format(PREHOOK_FAILED_MESSAGE, pre.getClass().getName());
Logger.error(this, errorMessage);
throw new DotRuntimeException(errorMessage);
}
}

final ESContentletScroll scrollQuery = conAPI.createScrollQuery(luceneQuery, user, respectFrontendRoles, batchSize, sortBy);

for (ContentletAPIPostHook post : postHooks) {
post.createScrollQuery(luceneQuery, user, respectFrontendRoles, batchSize, sortBy, scrollQuery);
}

return scrollQuery;
}


@Override
public List<Contentlet> findContentletsByHostBaseType(Host parentHost,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.dotmarketing.portlets.contentlet.business;

import com.dotcms.content.elasticsearch.business.ESContentletScroll;
import com.dotcms.content.elasticsearch.business.SearchCriteria;
import com.dotcms.contenttype.model.type.ContentType;
import com.dotcms.variant.model.Variant;
Expand Down Expand Up @@ -1839,4 +1840,16 @@ default void saveContentOnVariant(Contentlet contentlet, String variantName, Use
}

default void findContentletByIdentifierOrFallback(String identifier, boolean live, long incomingLangId, User user, boolean respectFrontendRoles, String variantName) {}

/**
* Creates an ElasticSearch Scroll API query with proper permissions applied.
*
* @param luceneQuery The lucene query string
* @param user The user executing the query
* @param respectFrontendRoles Whether to respect frontend roles
* @param batchSize The size of each batch returned by the scroll
* @param sortBy The sort criteria
* @param returnValue The ESContentletScroll object returned by the API
*/
default void createScrollQuery(String luceneQuery, User user, boolean respectFrontendRoles, int batchSize, String sortBy, ESContentletScroll returnValue) {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2144,4 +2144,18 @@ default boolean saveContentOnVariant(Contentlet contentlet, String variantName,
default boolean findContentletByIdentifierOrFallback(String identifier, boolean live, long incomingLangId, User user, boolean respectFrontendRoles, String variantName) {
return true;
}

/**
* Creates an ElasticSearch Scroll API query with proper permissions applied.
*
* @param luceneQuery The lucene query string
* @param user The user executing the query
* @param respectFrontendRoles Whether to respect frontend roles
* @param batchSize The size of each batch returned by the scroll
* @param sortBy The sort criteria
* @return false if the hook should stop the transaction
*/
default boolean createScrollQuery(String luceneQuery, User user, boolean respectFrontendRoles, int batchSize, String sortBy) {
return true;
}
}
Loading
Loading