Skip to content

Feature: Determine information about merged cells #323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,21 @@ public class Cell {
private final String rawValue;
private final String dataFormatId;
private final String dataFormatString;
private final CellAddress mergedCellAddress;

Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue) {
this(workbook, type, value, address, formula, rawValue, null, null);
Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue, CellAddress mergedCellAddress) {
this(workbook, type, value, address, formula, rawValue, mergedCellAddress, null, null);
}

Cell(ReadableWorkbook workbook, CellType type, Object value, CellAddress address, String formula, String rawValue,
String dataFormatId, String dataFormatString) {
CellAddress mergedCellAddress, String dataFormatId, String dataFormatString) {
this.workbook = workbook;
this.type = type;
this.value = value;
this.address = address;
this.formula = formula;
this.rawValue = rawValue;
this.mergedCellAddress = mergedCellAddress;
this.dataFormatId = dataFormatId;
this.dataFormatString = dataFormatString;
}
Expand Down Expand Up @@ -170,4 +172,11 @@ public String toString() {
return sb.append(']').toString();
}

public boolean isMerged() {
return mergedCellAddress != null;
}

public CellAddress getMergedCellAddress() {
return mergedCellAddress;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright 2016 Dhatim.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dhatim.fastexcel.reader;

import static org.dhatim.fastexcel.reader.DefaultXMLInputFactory.factory;

import java.io.InputStream;
import java.util.NoSuchElementException;
import java.util.Spliterator;
import java.util.function.Consumer;
import javax.xml.stream.XMLStreamException;

class MergeCellSpliterator implements Spliterator<CellRangeAddress> {

private final SimpleXmlReader r;

public MergeCellSpliterator(InputStream inputStream) throws XMLStreamException {
this.r = new SimpleXmlReader(factory, inputStream);
}

@Override
public boolean tryAdvance(Consumer<? super CellRangeAddress> action) {
try {
if (hasNext()) {
action.accept(next());
return true;
} else {
return false;
}
} catch (XMLStreamException e) {
throw new ExcelReaderException(e);
}
}

@Override
public Spliterator<CellRangeAddress> trySplit() {
return null;
}

@Override
public long estimateSize() {
return Long.MAX_VALUE;
}

@Override
public int characteristics() {
return DISTINCT | IMMUTABLE | NONNULL | ORDERED;
}

private boolean hasNext() throws XMLStreamException {
if (r.goTo(() -> r.isStartElement("mergeCell") || r.isEndElement("mergeCells"))) {
return "mergeCell".equals(r.getLocalName());
} else {
return false;
}
}


private CellRangeAddress next() {
if (!"mergeCell".equals(r.getLocalName())) {
throw new NoSuchElementException();
}

String ref = r.getAttribute("ref");
return CellRangeAddress.valueOf(ref);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
package org.dhatim.fastexcel.reader;

import java.util.stream.Collectors;
import javax.xml.stream.XMLStreamException;
import java.io.*;
import java.util.ArrayList;
Expand Down Expand Up @@ -145,7 +146,13 @@ private void createSheet(SimpleXmlReader r) {
Stream<Row> openStream(Sheet sheet) throws IOException {
try {
InputStream inputStream = pkg.getSheetContent(sheet);
Stream<Row> stream = StreamSupport.stream(new RowSpliterator(this, inputStream), false);

Stream<CellRangeAddress> mergedCellStream = StreamSupport.stream(new MergeCellSpliterator(inputStream), false);
List<CellRangeAddress> mergedCells = mergedCellStream.onClose(asUncheckedRunnable(inputStream)).collect(Collectors.toList());

inputStream = pkg.getSheetContent(sheet);

Stream<Row> stream = StreamSupport.stream(new RowSpliterator(this, mergedCells, inputStream), false);
return stream.onClose(asUncheckedRunnable(inputStream));
} catch (XMLStreamException e) {
throw new IOException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ class RowSpliterator implements Spliterator<Row> {

private final HashMap<Integer, BaseFormulaCell> sharedFormula = new HashMap<>();
private final HashMap<CellRangeAddress, String> arrayFormula = new HashMap<>();

private final List<CellRangeAddress> mergedCells;
private int rowCapacity = 16;

public RowSpliterator(ReadableWorkbook workbook, InputStream inputStream) throws XMLStreamException {
public RowSpliterator(ReadableWorkbook workbook, List<CellRangeAddress> mergedCells, InputStream inputStream) throws XMLStreamException {
this.workbook = workbook;
this.mergedCells = mergedCells;
this.r = new SimpleXmlReader(factory, inputStream);

r.goTo("sheetData");
Expand Down Expand Up @@ -105,6 +108,16 @@ private Row next() throws XMLStreamException {
private Cell parseCell() throws XMLStreamException {
String cellRef = r.getAttribute("r");
CellAddress addr = new CellAddress(cellRef);

CellAddress mergedCellAddress = null;

for(CellRangeAddress mergedCell : mergedCells) {
if (mergedCell.isInRange(addr.getRow(), addr.getColumn())) {
mergedCellAddress = new CellAddress(mergedCell.getFirstRow(), mergedCell.getFirstColumn());
break;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am afraid this could introduce a huge performance regression if there are many merged cell ranges, Maybe we could organize them in an ordered data structure to speed up this search? For an example, see #236.


String type = r.getOptionalAttribute("t").orElse("n");
String styleString = r.getAttribute("s");
String formatId = null;
Expand All @@ -118,15 +131,15 @@ private Cell parseCell() throws XMLStreamException {
}

if ("inlineStr".equals(type)) {
return parseInlineStr(addr);
return parseInlineStr(addr, mergedCellAddress);
} else if ("s".equals(type)) {
return parseString(addr);
return parseString(addr, mergedCellAddress);
} else {
return parseOther(addr, type, formatId, formatString);
return parseOther(addr, type, formatId, formatString, mergedCellAddress);
}
}

private Cell parseOther(CellAddress addr, String type, String dataFormatId, String dataFormatString)
private Cell parseOther(CellAddress addr, String type, String dataFormatId, String dataFormatString, CellAddress mergedCellAddress)
throws XMLStreamException {
CellType definedType = parseType(type);
Function<String, ?> parser = getParserForType(definedType);
Expand Down Expand Up @@ -174,10 +187,10 @@ private Cell parseOther(CellAddress addr, String type, String dataFormatId, Stri
}

if (formula == null && value == null && definedType == CellType.NUMBER) {
return new Cell(workbook, CellType.EMPTY, null, addr, null, rawValue);
return new Cell(workbook, CellType.EMPTY, null, addr, null, rawValue, mergedCellAddress);
} else {
CellType cellType = (formula != null) ? CellType.FORMULA : definedType;
return new Cell(workbook, cellType, value, addr, formula, rawValue, dataFormatId, dataFormatString);
return new Cell(workbook, cellType, value, addr, formula, rawValue, mergedCellAddress, dataFormatId, dataFormatString);
}
}

Expand Down Expand Up @@ -263,28 +276,28 @@ private String shiftCell(String cellID, Integer dCol, Integer dRow) {
}


private Cell parseString(CellAddress addr) throws XMLStreamException {
private Cell parseString(CellAddress addr, CellAddress mergedCellAddress) throws XMLStreamException {
r.goTo(() -> r.isStartElement("v") || r.isEndElement("c"));
if (r.isEndElement("c")) {
return empty(addr, CellType.STRING);
return empty(addr, CellType.STRING, mergedCellAddress);
}
String v = r.getValueUntilEndElement("v");
if (v.isEmpty()) {
return empty(addr, CellType.STRING);
return empty(addr, CellType.STRING, mergedCellAddress);
}
int index = Integer.parseInt(v);
String sharedStringValue = workbook.getSharedStringsTable().getItemAt(index);
Object value = sharedStringValue;
String formula = null;
String rawValue = sharedStringValue;
return new Cell(workbook, CellType.STRING, value, addr, formula, rawValue);
return new Cell(workbook, CellType.STRING, value, addr, formula, rawValue, mergedCellAddress);
}

private Cell empty(CellAddress addr, CellType type) {
return new Cell(workbook, type, "", addr, null, "");
private Cell empty(CellAddress addr, CellType type, CellAddress mergedCellAddress) {
return new Cell(workbook, type, "", addr, null, "", mergedCellAddress);
}

private Cell parseInlineStr(CellAddress addr) throws XMLStreamException {
private Cell parseInlineStr(CellAddress addr, CellAddress mergedCellAddress) throws XMLStreamException {
Object value = null;
String formula = null;
String rawValue = null;
Expand All @@ -299,7 +312,7 @@ private Cell parseInlineStr(CellAddress addr) throws XMLStreamException {
}
}
CellType cellType = formula == null ? CellType.STRING : CellType.FORMULA;
return new Cell(workbook, cellType, value, addr, formula, rawValue);
return new Cell(workbook, cellType, value, addr, formula, rawValue, mergedCellAddress);
}

private Optional<String> getArrayFormula(CellAddress addr) {
Expand Down Expand Up @@ -368,5 +381,4 @@ private static void ensureSize(List<?> list, int newSize) {
list.add(null);
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright 2016 Dhatim.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dhatim.fastexcel.reader;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.stream.Stream;

class MergeCellTest {
@org.junit.jupiter.api.Test
void test() throws IOException {
try (InputStream is = Resources.open("/xlsx/merge_cells.xlsx"); ReadableWorkbook wb = new ReadableWorkbook(is)) {
Sheet sheet = wb.getFirstSheet();
Map<Integer, Row> rowMap = new HashMap<>();
try (Stream<Row> rows = sheet.openStream()) {
rows.forEach(r -> rowMap.put(r.getRowNum(), r));
}

Row r1 = rowMap.get(1);
assertTrue(r1.getCell(0).isMerged());
assertEquals(0, r1.getCell(0).getMergedCellAddress().getRow());
assertEquals(0, r1.getCell(0).getMergedCellAddress().getColumn());
assertTrue(r1.getCell(1).isMerged());
assertEquals(0, r1.getCell(1).getMergedCellAddress().getRow());
assertEquals(0, r1.getCell(1).getMergedCellAddress().getColumn());
assertTrue(r1.getCell(2).isMerged());
assertEquals(0, r1.getCell(2).getMergedCellAddress().getRow());
assertEquals(2, r1.getCell(2).getMergedCellAddress().getColumn());
Row r2 = rowMap.get(2);
assertTrue(r2.getCell(0).isMerged());
assertEquals(0, r2.getCell(0).getMergedCellAddress().getRow());
assertEquals(0, r2.getCell(0).getMergedCellAddress().getColumn());
assertTrue(r2.getCell(1).isMerged());
assertEquals(0, r2.getCell(1).getMergedCellAddress().getRow());
assertEquals(0, r2.getCell(1).getMergedCellAddress().getColumn());
assertTrue(r2.getCell(2).isMerged());
assertEquals(0, r2.getCell(2).getMergedCellAddress().getRow());
assertEquals(2, r2.getCell(2).getMergedCellAddress().getColumn());
Row r3 = rowMap.get(3);
assertTrue(r3.getCell(0).isMerged());
assertEquals(2, r3.getCell(0).getMergedCellAddress().getRow());
assertEquals(0, r3.getCell(0).getMergedCellAddress().getColumn());
assertTrue(r3.getCell(1).isMerged());
assertEquals(2, r3.getCell(1).getMergedCellAddress().getRow());
assertEquals(0, r3.getCell(1).getMergedCellAddress().getColumn());
assertFalse(r3.getCell(2).isMerged());
assertNull(r3.getCell(2).getMergedCellAddress());
}
}
}