apache · dsmiley · Apr 3, 2025 · Apr 8, 2025 · Apr 11, 2025 · rmuir
diff --git a/lucene/core/src/java/org/apache/lucene/search/LongValues.java b/lucene/core/src/java/org/apache/lucene/search/LongValues.java
@@ -31,4 +31,20 @@ public abstract class LongValues {
    * @return true if there is a value for this document
    */
   public abstract boolean advanceExact(int doc) throws IOException;
+
+  /**
+   * An empty LongValues instance that always returns {@code false} from {@link #advanceExact(int)}
+   */
+  public static final LongValues EMPTY =
+      new LongValues() {
+        @Override
+        public long longValue() throws IOException {
+          throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public boolean advanceExact(int doc) throws IOException {
+          return false;
+        }
+      };
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java
@@ -132,7 +132,8 @@ protected Similarity(boolean discountOverlaps) {
    *
    * <p><b>WARNING</b>: The default implementation is used by Lucene's supplied Similarity classes,
    * which means you can change the Similarity at runtime without reindexing. If you override this
-   * method, you'll need to re-index documents for it to take effect.
+   * method, you'll need to re-index documents for it to take effect. Also be sure to override
+   * {@link #decodeNorm(long)}.
    *
    * <p>Matches in longer fields are less precise, so implementations of this method usually set
    * smaller values when <code>state.getLength()</code> is large, and larger values when <code>
@@ -161,6 +162,18 @@ public long computeNorm(FieldInvertState state) {
     return SmallFloat.intToByte4(numTerms);
   }
 
+  /**
+   * Decodes the normalization value as computed by {@link #computeNorm(FieldInvertState)}. The
+   * meaning is Similarity-dependent. The default meaning is the field length measured in positions,
+   * approximated.
+   *
+   * @lucene.experimental
+   * @param norm from {@link org.apache.lucene.index.NumericDocValues#longValue()} of the norm.
+   */
+  public long decodeNorm(long norm) {
+    return SmallFloat.byte4ToInt((byte) norm);
+  }
+
   /**
    * Compute any collection-level weight (e.g. IDF, average document length, etc) needed for scoring
    * a query.

diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/IndexReaderFunctions.java b/lucene/queries/src/java/org/apache/lucene/queries/function/IndexReaderFunctions.java
@@ -19,8 +19,10 @@
 
 import java.io.IOException;
 import java.util.Objects;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
@@ -30,10 +32,11 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LongValues;
 import org.apache.lucene.search.LongValuesSource;
+import org.apache.lucene.search.similarities.Similarity;
 
 /**
- * Class exposing static helper methods for generating DoubleValuesSource instances over some
- * IndexReader statistics
+ * Static helper methods for generating {@link DoubleValuesSource} and {@link LongValuesSource}
+ * instances over some IndexReader statistics
  */
 public final class IndexReaderFunctions {
 
@@ -301,6 +304,18 @@ public static DoubleValuesSource docCount(String field) {
     return new IndexReaderDoubleValuesSource(r -> r.getDocCount(field), "docCount(" + field + ")");
   }
 
+  /**
+   * Creates a value source that returns what the {@link Similarity} puts in the norm for this
+   * field. The default meaning is the field's position length, approximated.
+   *
+   * @see Similarity#computeNorm(FieldInvertState)
+   * @see Similarity#decodeNorm(long)
+   * @see org.apache.lucene.index.LeafReader#getNormValues(String)
+   */
+  public static LongValuesSource norm(String field) {
+    return new NormValuesSource(field);
+  }
+
   @FunctionalInterface
   private interface ReaderFunction {
     double apply(IndexReader reader) throws IOException;
@@ -413,4 +428,65 @@ public boolean isCacheable(LeafReaderContext ctx) {
       return false;
     }
   }
+
+  private static class NormValuesSource extends LongValuesSource {
+    private final String field;
+    private Similarity similarity;
+
+    private NormValuesSource(String field) {
+      this.field = Objects.requireNonNull(field);
+    }
+
+    @Override
+    public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
+      this.similarity = searcher.getSimilarity(); // isn't field-specific
+      return this;
+    }
+
+    @Override
+    public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
+      final NumericDocValues norms = ctx.reader().getNormValues(field);
+      if (norms == null) {
+        return LongValues.EMPTY;
+      }
+
+      return new LongValues() {
+        @Override
+        public long longValue() throws IOException {
+          return similarity.decodeNorm(norms.longValue());
+        }
+
+        @Override
+        public boolean advanceExact(int doc) throws IOException {
+          return norms.advanceExact(doc);
+        }
+      };
+    }
+
+    @Override
+    public boolean needsScores() {
+      return false;
+    }
+
+    @Override
+    public boolean isCacheable(LeafReaderContext ctx) {
+      return true;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof NormValuesSource that)) return false;
+      return field.equals(that.field);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(getClass(), field);
+    }
+
+    @Override
+    public String toString() {
+      return "norm(" + field + ")";
+    }
+  }
 }
diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestIndexReaderFunctions.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestIndexReaderFunctions.java
@@ -187,6 +187,13 @@ public void testDocCount() throws Exception {
     assertCacheable(vs, false);
   }
 
+  public void testNorm() throws Exception {
+    LongValuesSource vs = IndexReaderFunctions.norm("text");
+    assertHits(vs.toDoubleValuesSource(), new float[] {6, 2});
+    assertEquals("norm(text)", vs.toString());
+    assertCacheable(vs, true);
+  }
+
   void assertCacheable(DoubleValuesSource vs, boolean expected) throws Exception {
     Query q = new FunctionScoreQuery(new MatchAllDocsQuery(), vs);
     Weight w = searcher.createWeight(q, ScoreMode.COMPLETE, 1);