Skip to content

Commit 5142a7e

Browse files
committed
feat: enhance CSV utility with new aggregation and output functions
1 parent c982e95 commit 5142a7e

File tree

1 file changed

+235
-36
lines changed

1 file changed

+235
-36
lines changed

golang/csvUtilsGo.go

Lines changed: 235 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -158,19 +158,22 @@ func (c *CSV_Utils_Go) _validate(column string) error {
158158
}
159159

160160
// get_column_index returns the index of the specified column
161-
func (c *CSV_Utils_Go) get_column_index(column string) (int, error) {
161+
func (c *CSV_Utils_Go) get_column_index(column string, from_func_name string) int {
162162
err := c._validate(column)
163163
if err != nil {
164164
// requested column not found in the csv file, panic !!
165-
// panic(err)
166-
return -1, err
165+
panic(
166+
fmt.Sprintf(
167+
"Error: '%s', Check if you have specified the right column name in function: `'%s'`",
168+
err,
169+
from_func_name))
167170
}
168171
for i, h := range c.headers {
169172
if h == column {
170-
return i, nil
173+
return i
171174
}
172175
}
173-
return -1, errors.New("unexpected error in get_column_index")
176+
return -1 // , errors.New("unexpected error in get_column_index")
174177
}
175178

176179
// display_csv prints the first num_rows of the CSV data.
@@ -445,14 +448,9 @@ func (c *CSV_Utils_Go) remove_duplicates(column string, output_file_name string)
445448
remove all duplictae values from the given `column`
446449
*/
447450

448-
fmt.Println("REMOVING DUPS !! DEBUG !")
451+
// fmt.Println("REMOVING DUPS !! DEBUG !")
449452

450-
col_idx, err := c.get_column_index(column)
451-
452-
if err != nil {
453-
fmt.Println(err)
454-
panic(err)
455-
}
453+
col_idx := c.get_column_index(column, "remove_duplicates")
456454

457455
seen := make(map[string]bool)
458456
unique_rows := make([][]string, 0)
@@ -490,13 +488,7 @@ func (c *CSV_Utils_Go) replace_first_val(
490488
:output_file_name: The new file name in which updated data must be written
491489
*/
492490

493-
col_idx, err := c.get_column_index(column)
494-
495-
if err != nil {
496-
fmt.Println(err)
497-
panic(err)
498-
// return err
499-
}
491+
col_idx := c.get_column_index(column, "replace_first_val")
500492

501493
for i := range c.rows {
502494
if strings.EqualFold(c.rows[i][col_idx], old_val) {
@@ -519,11 +511,7 @@ func (c *CSV_Utils_Go) replace_all_vals(
519511

520512
/*SAME AS ABOVE FUNCTION*/
521513

522-
col_idx, err := c.get_column_index(column)
523-
if err != nil {
524-
fmt.Println(err)
525-
panic(err)
526-
}
514+
col_idx := c.get_column_index(column, "replace_all_vals")
527515

528516
for i := range c.rows {
529517
if strings.EqualFold(c.rows[i][col_idx], old_val) {
@@ -580,12 +568,7 @@ func (c *CSV_Utils_Go) filter_rows(
580568
:param: `output_file_name`: the current CSV file is updated if not given else a new file is created
581569
*/
582570

583-
col_idx, err := c.get_column_index(column)
584-
if err != nil {
585-
// cannot continue if column not found
586-
panic(err)
587-
// return nil, err
588-
}
571+
col_idx := c.get_column_index(column, "filter_rows")
589572

590573
filtered_rows := [][]string{c.headers}
591574

@@ -610,12 +593,7 @@ func (c *CSV_Utils_Go) sort_csv(
610593
:param: `output_file_name`: If given some name, the sorted CSV data would be written in that file
611594
*/
612595

613-
col_idx, err := c.get_column_index(column)
614-
if err != nil {
615-
// cannot continue if column not found
616-
panic(err)
617-
// return nil, err
618-
}
596+
col_idx := c.get_column_index(column, "sort_csv")
619597

620598
// make a copy of rows for sorting to avoid modifying original order if needed
621599
sorted_rows := make([][]string, len(c.rows))
@@ -634,3 +612,224 @@ func (c *CSV_Utils_Go) sort_csv(
634612
combined = append(combined, sorted_rows...)
635613
return combined, nil
636614
}
615+
616+
617+
// # aggregate_column performs aggregation on a numeric column using an operation.
618+
func (c *CSV_Utils_Go) aggregate_column(column string, operation string) (float64, error) {
619+
620+
/*
621+
Performs operations like: 'sum', 'min', 'max', 'std'
622+
623+
:param: `column`: column name on which aggregation will be performed
624+
:param: `operation`: "sum" | "min" | "max' | "std"(standard deviation)
625+
:return: aggregated float value
626+
*/
627+
628+
col_idx := c.get_column_index(column, "aggregate_column")
629+
630+
values := []float64{}
631+
632+
for _, row := range c.rows {
633+
634+
// Check if the value is numeric by attempting conversion
635+
valStr := row[col_idx]
636+
637+
// Remove one dot if present to mimic Python's replace('.', '', 1) for isdigit check
638+
valStrForCheck := strings.Replace(valStr, ".", "", 1)
639+
640+
if _, err := strconv.ParseFloat(valStrForCheck, 64); err == nil {
641+
val, err := strconv.ParseFloat(valStr, 64)
642+
if err == nil {
643+
values = append(values, val)
644+
}
645+
}
646+
}
647+
648+
if len(values) == 0 {
649+
return 0, fmt.Errorf("Column name: `'%s'` has no numeric values", column)
650+
}
651+
652+
switch operation {
653+
case "sum":
654+
sum := 0.0
655+
for _, v := range values {
656+
sum += v
657+
}
658+
return sum, nil
659+
case "avg":
660+
sum := 0.0
661+
for _, v := range values {
662+
sum += v
663+
}
664+
return sum / float64(len(values)), nil
665+
case "min":
666+
min := values[0]
667+
for _, v := range values {
668+
if v < min {
669+
min = v
670+
}
671+
}
672+
return min, nil
673+
case "max":
674+
max := values[0]
675+
for _, v := range values {
676+
if v > max {
677+
max = v
678+
}
679+
}
680+
return max, nil
681+
case "std":
682+
std_dev := standardDeviation(values)
683+
return std_dev, nil
684+
default:
685+
return 0, errors.New("invalid operation. Choose from 'sum', 'avg', 'min', 'max'")
686+
}
687+
}
688+
689+
690+
// output_processed_csv writes the current CSV data to the specified output path.
691+
func (c *CSV_Utils_Go) output_processed_csv(output_path string) {
692+
693+
if output_path == "" {
694+
output_path = c.file_path // Overwrite original file
695+
}
696+
697+
file, err := os.Create(output_path)
698+
if err != nil {
699+
fmt.Printf("Error writing file: %v\n", err)
700+
return
701+
}
702+
703+
defer file.Close()
704+
705+
writer := csv.NewWriter(file)
706+
defer writer.Flush()
707+
708+
_ = writer.Write(c.headers)
709+
err = writer.WriteAll(c.rows)
710+
711+
if err != nil {
712+
fmt.Printf("Error writing file: %v\n", err)
713+
return
714+
}
715+
716+
fmt.Printf("Data saved to %s\n", output_path)
717+
}
718+
719+
720+
// apply_func applies a provided function to all values in the specified column.
721+
func (c *CSV_Utils_Go) apply_func(
722+
column string,
723+
funcToApply interface{},
724+
output_file_name string) ([][]string, error) {
725+
726+
/*
727+
Apply a given function to all values in a specified column.
728+
729+
:param column: Column name to apply function on.
730+
:param func: Function to apply.
731+
:param output_file_name: Optional filename to save modified data.
732+
:return: Modified CSV data as a list of lists.
733+
*/
734+
735+
col_idx := c.get_column_index(column, "apply_func")
736+
737+
modified_data := deepCopy2D(c.rows)
738+
739+
for _, row := range modified_data {
740+
741+
// Using recover to catch any panic during func execution
742+
func() {
743+
defer func() {
744+
if r := recover(); r != nil {
745+
fmt.Printf("Error processing row %v: %v\n", row, r)
746+
}
747+
}()
748+
749+
// row[col_idx] = funcToApply(row[col_idx])
750+
751+
switch f := funcToApply.(type) {
752+
753+
case func(string) string:
754+
row[col_idx] = f(row[col_idx])
755+
756+
case func(int) int:
757+
num, err := strconv.Atoi(row[col_idx])
758+
if err == nil {
759+
row[col_idx] = strconv.Itoa(f(num))
760+
}
761+
762+
case func(float64) float64:
763+
num, err := strconv.ParseFloat(row[col_idx], 64)
764+
if err == nil {
765+
row[col_idx] = fmt.Sprintf("%f", f(num))
766+
} else {
767+
fmt.Printf("Skipping non-float value: %v\n", row[col_idx])
768+
}
769+
770+
case func(float32) float32:
771+
num, err := strconv.ParseFloat(row[col_idx], 64)
772+
if err == nil {
773+
row[col_idx] = fmt.Sprintf("%f", f(float32(num)))
774+
} else {
775+
fmt.Printf("Skipping non-float value: %v\n", row[col_idx])
776+
}
777+
778+
default:
779+
fmt.Printf("Unsupported function type for row %v\n", row)
780+
}
781+
782+
}()
783+
}
784+
785+
combined := append([][]string{c.headers}, modified_data...)
786+
c._update_csv(output_file_name, combined, "")
787+
788+
return combined, nil
789+
}
790+
791+
792+
// export_json converts the CSV data to JSON format and optionally writes it to a file.
793+
func (c *CSV_Utils_Go) export_json(json_file string) (string, error) {
794+
795+
/*
796+
Convert CSV data to JSON format and optionally save it to a file.
797+
798+
:param json_file: (Optional) Filename to save JSON data.
799+
:return: The JSON data as a string.
800+
*/
801+
802+
data := make([]map[string]string, 0)
803+
804+
for _, row := range c.rows {
805+
806+
entry := make(map[string]string)
807+
808+
for i, header := range c.headers {
809+
if i < len(row) {
810+
entry[header] = row[i]
811+
} else {
812+
entry[header] = ""
813+
}
814+
}
815+
816+
data = append(data, entry)
817+
}
818+
819+
jsonBytes, err := json.MarshalIndent(data, "", " ")
820+
821+
if err != nil {
822+
return "", err
823+
}
824+
825+
json_data := string(jsonBytes)
826+
827+
if json_file != "" {
828+
err := os.WriteFile(json_file, []byte(json_data), 0644)
829+
if err != nil {
830+
return "", err
831+
}
832+
}
833+
834+
return json_data, nil
835+
}

0 commit comments

Comments
 (0)