Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.drill.common.config.DrillConfig;
import org.apache.drill.common.util.DrillStringUtils;
import org.apache.drill.common.util.FileUtils;
Expand Down Expand Up @@ -242,14 +243,14 @@ public Class<?> getImplementationClass(
final byte[][] implementationClasses = classLoader.getClassByteCode(set.generated, entireClass);

long totalBytecodeSize = 0;
Map<String, ClassNode> classesToMerge = Maps.newHashMap();
Map<String, Pair<byte[], ClassNode>> classesToMerge = Maps.newHashMap();
for (byte[] clazz : implementationClasses) {
totalBytecodeSize += clazz.length;
final ClassNode node = AsmUtil.classFromBytes(clazz, ClassReader.EXPAND_FRAMES);
if (!AsmUtil.isClassOk(logger, "implementationClasses", node)) {
throw new IllegalStateException("Problem found with implementationClasses");
}
classesToMerge.put(node.name, node);
classesToMerge.put(node.name, Pair.of(clazz, node));
}

final LinkedList<ClassSet> names = Lists.newLinkedList();
Expand All @@ -264,7 +265,14 @@ public Class<?> getImplementationClass(
final ClassNames nextPrecompiled = nextSet.precompiled;
final byte[] precompiledBytes = byteCodeLoader.getClassByteCodeFromPath(nextPrecompiled.clazz);
final ClassNames nextGenerated = nextSet.generated;
final ClassNode generatedNode = classesToMerge.get(nextGenerated.slash);
// keeps only classes that have not be merged
Pair<byte[], ClassNode> classNodePair = classesToMerge.remove(nextGenerated.slash);
final ClassNode generatedNode;
if (classNodePair != null) {
generatedNode = classNodePair.getValue();
} else {
generatedNode = null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does it mean by "generatedNode = null"? Will it cause problem in subsequent call at line 288?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is needed for the case when generated class was empty. Then classesToMerge would be empty too and classesToMerge.remove(nextGenerated.slash) would return null.
No, it wouldn't. In the method MergeAdapter.getMergedClass() there are a many checks for this value.

}

/*
* TODO
Expand Down Expand Up @@ -309,6 +317,10 @@ public Class<?> getImplementationClass(
namesCompleted.add(nextSet);
}

// adds byte code of the classes that have not been merged to make them accessible for outer class
for (Map.Entry<String, Pair<byte[], ClassNode>> clazz : classesToMerge.entrySet()) {
classLoader.injectByteCode(clazz.getKey().replace(FileUtils.separatorChar, '.'), clazz.getValue().getKey());
}
Class<?> c = classLoader.findClass(set.generated.dot);
if (templateDefinition.getExternalInterface().isAssignableFrom(c)) {
logger.debug("Compiled and merged {}: bytecode size = {}, time = {} ms.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@

import java.lang.reflect.Constructor;
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.DataMode;
Expand Down Expand Up @@ -59,14 +61,16 @@
import com.sun.codemodel.JType;
import com.sun.codemodel.JVar;
import org.apache.drill.exec.server.options.OptionSet;
import org.objectweb.asm.Label;

public class ClassGenerator<T>{

public static final GeneratorMapping DEFAULT_SCALAR_MAP = GM("doSetup", "doEval", null, null);
public static final GeneratorMapping DEFAULT_CONSTANT_MAP = GM("doSetup", "doSetup", null, null);

static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ClassGenerator.class);
public static enum BlockType {SETUP, EVAL, RESET, CLEANUP};

public enum BlockType {SETUP, EVAL, RESET, CLEANUP}

private final SignatureHolder sig;
private final EvaluationVisitor evaluationVisitor;
Expand All @@ -77,10 +81,43 @@ public static enum BlockType {SETUP, EVAL, RESET, CLEANUP};
private final CodeGenerator<T> codeGenerator;

public final JDefinedClass clazz;
private final LinkedList<SizedJBlock>[] blocks;

private final JCodeModel model;
private final OptionSet optionManager;

private ClassGenerator<T> innerClassGenerator;
private LinkedList<SizedJBlock>[] blocks;
private LinkedList<SizedJBlock>[] oldBlocks;

/**
* Assumed that field has 3 indexes within the constant pull: index of the CONSTANT_Fieldref_info +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure the calculation is correct, in terms of # of entries per field in constant pool of a class.

Per JVM spec, each class field has CONSTANT_Fieldref_info (1 entry), which has class_index and name_and_type_index. The class_index points CONSTANT_Class_info, which is shared by across all the class fields. The second points to CONSTANT_NameAndType_info (1 entry), which points to name (1 entry) and descriptor (1 entry). Therefore, for each class field, at least 4 entries are required in constant pool. Similarly, we could get 4 entries for each method.

Besides fields and methods, we also have to take constant literal into account, like int, float , string ... constant. For constant literals, since we apply source-code copy for build-in-function /udf, it's hard to figure out exactly how many constants are used in the generated class.

Given the above reasons, I'm not sure whether it makes sense to try to come up with a formula to estimate the maximum # of fields a generated class could have. If the estimation is not accurate, then what if we just provides a ballpark estimation and put some 'magic' number here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this calculation is taken into account that CONSTANT_NameAndType_info.descriptor has limited range of its values, so it was taken 3 entries for the each class field and method.

In this formula supposed that each class field and local variable use different literal values that have two entries. I am agree with you that there may be cases that have not been covered by this formula.

The formula is needed for at least to consider the number of generated methods, difference between entries count for class fields and local variables. The 'magic' number 1000 was used in this formula to reserve constant pool for class references and unaccounted cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although the proposed formula might still hit problem in certain cases, it seems to work fine for normal cases. Also, for normal queries over hundreds of columns, the run-time generated code remains same as before. Given that, it makes sense to me to merge this code change.

* CONSTANT_Fieldref_info.name_and_type_index + CONSTANT_NameAndType_info.name_index.
* CONSTANT_NameAndType_info.descriptor_index has limited range of values, CONSTANT_Fieldref_info.class_index is
* the same for a single class, they will be taken into account later.
* <p>
* Local variable has 1 index within the constant pool.
* {@link org.objectweb.asm.MethodWriter#visitLocalVariable(String, String, String, Label, Label, int)}
* <p>
* For upper estimation of max index value, suppose that each field and local variable uses different literal
* values that have two indexes, then the number of occupied indexes within the constant pull is
* fieldCount * 3 + fieldCount * 2 + (index - fieldCount) * 3 => fieldCount * 2 + index * 3
* <p>
* Assumed that method has 3 indexes within the constant pull: index of the CONSTANT_Methodref_info +
* CONSTANT_Methodref_info.name_and_type_index + CONSTANT_NameAndType_info.name_index.
* <p>
* For the upper estimation of number of split methods suppose that each expression in the method uses single variable.
* Suppose that the max number of indexes within the constant pull occupied by fields and local variables is M,
* the number of split methods is N, number of abstract methods in the template is A, then splitted methods count is
* N = (M - A * N * 3) / 50 => N = M / (50 + A * 3)
* <p>
* Additionally should be taken into account class references; fields and methods from the template,
* so reserves 1000 for them.
* <p>
* Then the size of the occupied part in the constant pull is
* (fieldCount * 2 + index * 3 + 1000) * (1 + 3 / (50 + A * 3))
*/
private long maxIndex;

private int index = 0;
private int labelIndex = 0;
private MappingSet mappings;
Expand Down Expand Up @@ -123,6 +160,8 @@ public static MappingSet getDefaultMapping() {
JDefinedClass innerClazz = clazz._class(mods, innerClassName);
innerClasses.put(innerClassName, new ClassGenerator<>(codeGenerator, mappingSet, child, eval, innerClazz, model, optionManager));
}
long maxExprsNumber = optionManager != null ? optionManager.getOption(ExecConstants.CODE_GEN_EXP_IN_METHOD_SIZE_VALIDATOR) : 50;
maxIndex = Math.round((0xFFFF / (1 + 3. / (3 * sig.size() + maxExprsNumber)) - 1000) / 3);
}

public ClassGenerator<T> getInnerGenerator(String name) {
Expand All @@ -136,6 +175,9 @@ public MappingSet getMappingSet() {
}

public void setMappingSet(MappingSet mappings) {
if (innerClassGenerator != null) {
innerClassGenerator.setMappingSet(mappings);
}
this.mappings = mappings;
}

Expand Down Expand Up @@ -210,7 +252,19 @@ public JVar declareVectorValueSetupAndMember(String batchName, TypedFieldId fiel
return declareVectorValueSetupAndMember(DirectExpression.direct(batchName), fieldId);
}

/**
* Creates class variable for the value vector using metadata from {@code fieldId}
* and initializes it using setup blocks.
*
* @param batchName expression for invoking {@code getValueAccessorById} method
* @param fieldId metadata of the field that should be declared
* @return a newly generated class field
*/
public JVar declareVectorValueSetupAndMember(DirectExpression batchName, TypedFieldId fieldId) {
// declares field in the inner class if innerClassGenerator has been created
if (innerClassGenerator != null) {
return innerClassGenerator.declareVectorValueSetupAndMember(batchName, fieldId);
}
final ValueVectorSetup setup = new ValueVectorSetup(batchName, fieldId);
// JVar var = this.vvDeclaration.get(setup);
// if(var != null) return var;
Expand Down Expand Up @@ -286,6 +340,65 @@ public void rotateBlock() {
rotateBlock(BlkCreateMode.TRUE);
}

/**
* Assigns {@link #blocks} from the last nested {@link #innerClassGenerator} to {@link this#blocks}
* recursively if {@link #innerClassGenerator} has been created.
*/
private void setupValidBlocks() {
if (createNestedClass()) {
// blocks from the last inner class should be used
setupInnerClassBlocks();
}
}

/**
* Creates {@link #innerClassGenerator} with inner class
* if {@link #hasMaxIndexValue()} returns {@code true}.
*
* @return true if splitting happened.
*/
private boolean createNestedClass() {
if (hasMaxIndexValue()) {
// all new fields will be declared in the class from innerClassGenerator
if (innerClassGenerator == null) {
try {
JDefinedClass innerClazz = clazz._class(JMod.PRIVATE, clazz.name() + "0");
innerClassGenerator = new ClassGenerator<>(codeGenerator, mappings, sig, evaluationVisitor, innerClazz, model, optionManager);
} catch (JClassAlreadyExistsException e) {
throw new DrillRuntimeException(e);
}
oldBlocks = blocks;
innerClassGenerator.index = index;
innerClassGenerator.maxIndex += index;
// blocks from the inner class should be used
setupInnerClassBlocks();
return true;
}
return innerClassGenerator.createNestedClass();
}
return false;
}

/**
* Checks that {@link #index} has reached its max value.
*
* @return true if {@code index + clazz.fields().size() * 2 / 3} is greater than {@code maxIndex}
*/
private boolean hasMaxIndexValue() {
return index + clazz.fields().size() * 2 / 3 > maxIndex;
}

/**
* Gets blocks from the last inner {@link ClassGenerator innerClassGenerator}
* and assigns it to the {@link this#blocks} recursively.
*/
private void setupInnerClassBlocks() {
if (innerClassGenerator != null) {
innerClassGenerator.setupInnerClassBlocks();
blocks = innerClassGenerator.blocks;
}
}

/**
* Create a new code block, closing the current block.
*
Expand All @@ -306,17 +419,27 @@ private void rotateBlock(BlkCreateMode mode) {
}
if (blockRotated) {
evaluationVisitor.previousExpressions.clear();
setupValidBlocks();
}
}

/**
* Creates methods from the signature {@code sig} with body from the appropriate {@code blocks}.
*/
void flushCode() {
JVar innerClassField = null;
if (innerClassGenerator != null) {
blocks = oldBlocks;
innerClassField = clazz.field(JMod.NONE, model.ref(innerClassGenerator.clazz.name()), "innerClassField");
innerClassGenerator.flushCode();
}
int i = 0;
for(CodeGeneratorMethod method : sig) {
for (CodeGeneratorMethod method : sig) {
JMethod outer = clazz.method(JMod.PUBLIC, model._ref(method.getReturnType()), method.getMethodName());
for(CodeGeneratorArgument arg : method) {
for (CodeGeneratorArgument arg : method) {
outer.param(arg.getType(), arg.getName());
}
for(Class<?> c : method.getThrowsIterable()) {
for (Class<?> c : method.getThrowsIterable()) {
outer._throws(model.ref(c));
}
outer._throws(SchemaChangeException.class);
Expand Down Expand Up @@ -353,6 +476,38 @@ void flushCode() {
exprsInMethod += sb.getCount();
}
}
if (innerClassField != null) {
// creates inner class instance and initializes innerClassField
if (method.getMethodName().equals("__DRILL_INIT__")) {
JInvocation rhs = JExpr._new(innerClassGenerator.clazz);
JBlock block = new JBlock().assign(innerClassField, rhs);
outer.body().add(block);
}

List<JType> argTypes = new ArrayList<>();
for (CodeGeneratorArgument arg : method) {
argTypes.add(model._ref(arg.getType()));
}
JMethod inner = innerClassGenerator.clazz.getMethod(method.getMethodName(), argTypes.toArray(new JType[0]));

if (inner != null) {
// removes empty method from the inner class
if (inner.body().isEmpty()) {
innerClassGenerator.clazz.methods().remove(inner);
continue;
}

JInvocation methodCall = innerClassField.invoke(inner);
for (CodeGeneratorArgument arg : method) {
methodCall.arg(JExpr.direct(arg.getName()));
}
if (isVoidMethod) {
outer.body().add(methodCall);
} else {
outer.body()._return(methodCall);
}
}
}
}

for(ClassGenerator<T> child : innerClasses.values()) {
Expand All @@ -373,10 +528,13 @@ public String getNextVar(String prefix) {
}

public JVar declareClassField(String prefix, JType t) {
return clazz.field(JMod.NONE, t, prefix + index++);
return declareClassField(prefix, t, null);
}

public JVar declareClassField(String prefix, JType t, JExpression init) {
if (innerClassGenerator != null && hasMaxIndexValue()) {
return innerClassGenerator.clazz.field(JMod.NONE, t, prefix + index++, init);
}
return clazz.field(JMod.NONE, t, prefix + index++, init);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
Expand Down Expand Up @@ -45,7 +45,7 @@ public class TestLargeFileCompilation extends BaseTestQuery {

private static final int ITERATION_COUNT = Integer.valueOf(System.getProperty("TestLargeFileCompilation.iteration", "1"));

private static final int NUM_PROJECT_COLUMNS = 2000;
private static final int NUM_PROJECT_COLUMNS = 5000;

private static final int NUM_ORDERBY_COLUMNS = 500;

Expand Down