Skip to content

Excel add new sheet without overwriting the file #157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,9 @@ public fun <T> DataFrame<T>.writeExcel(
sheetName: String? = null,
writeHeader: Boolean = true,
workBookType: WorkBookType = WorkBookType.XLSX,
keepFile: Boolean = false,
) {
return writeExcel(File(path), columnsSelector, sheetName, writeHeader, workBookType)
return writeExcel(File(path), columnsSelector, sheetName, writeHeader, workBookType, keepFile)
}

public enum class WorkBookType {
Expand All @@ -320,16 +321,22 @@ public fun <T> DataFrame<T>.writeExcel(
sheetName: String? = null,
writeHeader: Boolean = true,
workBookType: WorkBookType = WorkBookType.XLSX,
keepFile: Boolean = false,
) {
val factory = when (workBookType) {
WorkBookType.XLS -> {
{ HSSFWorkbook() }
}

WorkBookType.XLSX -> {
{ XSSFWorkbook() }
val factory =
if (keepFile){
when (workBookType) {
WorkBookType.XLS -> HSSFWorkbook(file.inputStream())
WorkBookType.XLSX -> XSSFWorkbook(file.inputStream())
}
}
else {
when (workBookType) {
WorkBookType.XLS -> HSSFWorkbook()
WorkBookType.XLSX -> XSSFWorkbook()
}
}
}
return file.outputStream().use {
writeExcel(it, columnsSelector, sheetName, writeHeader, factory)
}
Expand All @@ -340,9 +347,9 @@ public fun <T> DataFrame<T>.writeExcel(
columnsSelector: ColumnsSelector<T, *> = { all() },
sheetName: String? = null,
writeHeader: Boolean = true,
factory: () -> Workbook,
factory: Workbook
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this overload. I think i wanted to create an alias for () -> Workbook so that people could call this function like df.writeExcel(os, ..., WorkbookFactory.XLSX), and WorkbookFactory.XLSX here is a class owned by the dataframe. But i shelved the idea, so this overload adds no value compared to the one below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking back at it, should it be renamed to workBook now? Since it is what is really getting passed after this change. I can change it if so.

) {
val wb: Workbook = factory()
val wb: Workbook = factory
writeExcel(wb, columnsSelector, sheetName, writeHeader)
wb.write(outputStream)
wb.close()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,21 @@ class XlsxTest {
}
}

@Test
fun `write to new sheet when keepFile is true`() {
val names = (1..5).map { "column$it" }
val df = dataFrameOf(names).randomDouble(7)
val fileLoc = Files.createTempFile("generated_wb", ".xlsx").toFile()

df.writeExcel(fileLoc, sheetName = "TestSheet1")
df.writeExcel(fileLoc, sheetName = "TestSheet2", keepFile = true)

val testSheet1Df = DataFrame.readExcel(fileLoc, sheetName = "TestSheet1")
val testSheet2Df = DataFrame.readExcel(fileLoc, sheetName = "TestSheet2")

testSheet1Df.columnNames() shouldBe testSheet2Df.columnNames()
}

@Test
fun `read xlsx file with duplicated columns and repair column names`() {
shouldThrow<DuplicateColumnNamesException> {
Expand Down
15 changes: 15 additions & 0 deletions docs/StardustDocs/topics/write.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ wb.close()

<!---END-->

Add new sheets without using Apache POI directly by using a parameter to keep using the same file if it already exists

<!---FUN writeXlsWithMultipleSheets-->

```kotlin
// Create a new Excel workbook with a single sheet called "allPersons", replacing the file if it already exists -> Current sheets: allPersons
df.writeExcel(file, sheetName = "allPersons")
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons
df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "happyPersons", keepFile = true)
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons, unhappyPersons
df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "unhappyPersons", keepFile = true)
```

<!---END-->

### Writing to Apache Arrow formats

Add dependency:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,20 @@ class Write : TestBase() {
}
}

@Test
fun writeXlsWithMultipleSheets() {
useTempFile { file ->
// SampleStart
// Create a new Excel workbook with a single sheet called "allPersons", replacing the file if it already exists -> Current sheets: allPersons
df.writeExcel(file, sheetName = "allPersons")
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons
df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "happyPersons", keepFile = true)
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons, unhappyPersons
df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "unhappyPersons", keepFile = true)
// SampleEnd
}
}

companion object {
private fun String.rejoinWithSystemLineSeparator() = rejoinWithLineSeparator(System.lineSeparator())

Expand Down