Spire.XLS is a professional Excel API that enables developers to create, manage, manipulate, convert and print Excel worksheets. Get free and professional technical support for Spire.XLS for .NET, Java, Android, C++, Python.

Sun May 09, 2021 3:18 am

Hi,

Say I have a worksheet with 6 columns and there are some rows in the sheet whose values from column 1 to column 5 are same just the numeric value in column 6 are different.

I want to find those near identical rows (if any) and then replace them with only one row where the values from column 1 to column 5 are same as before just the value in column 6 is the sum of all the column 6 values of those near identical rows.
Ex.
From
    011 ----- JOHN HAMMOND ---- 15-02-1983 ---- USA ---- 3345669 ---- 4560.00
    011 ----- JOHN HAMMOND ---- 15-02-1983 ---- USA ---- 3345669 ---- 60.00
    011 ----- JOHN HAMMOND ---- 15-02-1983 ---- USA ---- 3345669 ---- 150.00

To
    011 ----- JOHN HAMMOND ----- 15-02-1983 ----- USA ----- 3345669 ----- 4770.00

How can I do this?

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Mon May 10, 2021 10:30 am

Hello,

Thank you for your inquiry. Please refer to the following code to achieve your need. If this doesn't help, please provide your Excel file for further investigation.
Code: Select all
            Workbook workbook = new Workbook();
            workbook.LoadFromFile(@"D:\360MoveData\Users\Administrator\Desktop\value.xlsx");
            Worksheet sheet = workbook.Worksheets[0];

            int colCount = sheet.Rows[0].Count;
            double sum;
            for (int i = 0; i < sheet.Columns[0].Count - 1; i++)
            {
                //The initial value is the value of the last column of the i-th row
                sum = sheet.Columns[colCount - 1].Rows[i].NumberValue;
                for (int j = i + 1; j < sheet.Columns[0].Count; j++)
                {
                    //Compare the value of the first column in the table
                    // If the value of the first column of the i-th row is the same as the value of the first column of the j-th row
                    if (sheet.Columns[0].CellList[i].Value == sheet.Columns[0].CellList[j].Value)
                    {
                        //Compare the values of row i and row j in the table
                        for (int col = 1; col < sheet.Rows[0].Count ; col++)
                        {
                            //Compare the values of the i-th row and the j-th row in the table until the value of the penultimate cell 
                            //is also the same. Add the value of the j-th row to the initial value, and then delete the j-th row
                            if (col == sheet.Rows[0].Count - 2 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
                            {
                                sum += sheet.Rows[j].Columns[colCount - 1].NumberValue;
                                sheet.DeleteRow(j+1);
                                j--;
                            }
                            else if (col != sheet.Rows[0].Count - 2 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
                            {
                                continue;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
                //The value in the last column of the i-th row is the sum of the last column values of all the same groups
                sheet.Columns[colCount - 1].Rows[i].NumberValue = sum;
            }
            workbook.SaveToFile(@"D:\360MoveData\Users\Administrator\Desktop\output.xlsx", FileFormat.Version2016);

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Tue May 11, 2021 4:37 am

Thanks for your reply.

Sorry, I should have added my input and desired output file in my first post. I've attached it in this post for your better understanding.

The near identical rows should be identified when the data from column A to F are same and then all of those identical rows should be replaced by one row with column G having the sum. Data on column H should be ignored.

Thanks in advance.
Attachments
Desktop.rar
(17.49 KiB) Downloaded 204 times

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Tue May 11, 2021 9:34 am

Hello,

Thanks for your sharing. Please try the following code to achieve your need. If there is any question, please feel free to write back.
Code: Select all
 
            Workbook workbook = new Workbook();
            workbook.LoadFromFile("input.xlsx");
            Worksheet sheet = workbook.Worksheets[0];
            int colCount = sheet.Rows[0].Count;
            float sum;
            for (int i = 1; i < sheet.Columns[0].Count - 1; i++)
            {
                //Delete blank row
                if (sheet.Rows[i].IsBlank )
                {
                    sheet.DeleteRow(i + 1);
                    i--;
                }
                sum = (float)sheet.Columns[colCount - 2].Rows[i].NumberValue;
                for (int j = i + 1; j < sheet.Columns[0].Count; j++)
                {
                   
                    if (sheet.Columns[0].CellList[i].Value == sheet.Columns[0].CellList[j].Value)
                    {
                       
                        for (int col = 1; col < sheet.Rows[0].Count - 1; col++)
                        {
                           
                            if (col == sheet.Rows[0].Count - 3 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
                            {
                                    sum += (float)sheet.Columns[colCount - 2].Rows[j].NumberValue;
                                    sheet.DeleteRow(j + 1);
                                    j--;

         
                            }
                            else if (col != sheet.Rows[0].Count - 3 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
                            {
                                continue;
                            }
                            else
                            {
                                break;
                            }
                        }
                    }
                }
               
                sheet.Columns[colCount - 2].Rows[i].NumberValue = sum;
            }
            workbook.SaveToFile("output.xlsx", FileFormat.Version2016);

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Wed May 12, 2021 4:35 am

Hi,

Thanks for your code.

So far so good.

On a different subject which I initially thought it would be easy to get it done i.e. check if any date exists in a specific column using your article https://www.e-iceblue.com/Tutorials/Spi ... -in-C.html.

When I tried it on column F of my input file just to check, it returned the cellType.ToString(); as Number and not as Date...why is that?

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Wed May 12, 2021 8:53 am

Hello,

Thanks for your inquiry.
The TRangeValueType only provides 6 enumeration types (string/number/boolean/formula/error/blank), which does not include the date type. Thus,"Type.String();" returns "Number" instead of "Date". However, you can use the following method to determine whether it is a date type and customize its return type. If there is any other issue, just feel free to write back.

Sincerely,
Sofia
E-iceblue support team
Attachments
setting.png
setting.png (8.7 KiB) Viewed 1704 times
User avatar

Sofia.Yang
 
Posts: 84
Joined: Tue Jul 14, 2020 1:41 am

Thu May 13, 2021 9:07 am

Thank you

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Fri May 14, 2021 1:23 am

Hello,

You're welcome.
If you have other questions about using Spire.XLS in the future, please feel free to contact us.

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Fri May 14, 2021 7:15 am

Hi,

Regarding "ADD COLUMN VALUES OF NEAR IDENTICAL ROWS", the code you provided is quite slow even for relatively small excel file.

Can it be made more faster/efficient?

Thanks

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Fri May 14, 2021 12:57 pm

Hello,

The attached code is more efficient than the previous code. Please have a try. If there is any question, please feel free to write back.

Sincerely,
Annika
E-iceblue support team
Attachments
demo.zip
(1.2 KiB) Downloaded 195 times
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Fri May 14, 2021 4:11 pm

Hi,

Thank you again for your reply !!

I've run both your old and new code on the input file that I provided earlier in this post. I usually use Ctrl+Down key to go to the last row of a sheet.

Interestingly when I used the Ctrl+Down key on the output file which was created from your new code it took me to a blank row which was unusual, so I tried to get the last row and last column using spire.xls coding just to check and I found out that the output file which was created from your old code was showing Last row : 24 & Last column : 8 and the output file which was created from your new code was showing Last row : 102 & Last column : 8.

So I'm a bit confused as to why is this happening and what is the solution to this as this could lead to a serious flaw in my code..

Thanks in advance.

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Mon May 17, 2021 9:14 am

Hello,

Thanks for your feedback.
There are actually 102 lines in the excel file you provided. In the old code, I deleted the blank rows and duplicate rows, so the excel sheet has only 24 rows. The new code does not delete blank rows and the data in duplicate rows is replaced with empty strings, so the excel sheet still has 102 rows.
You could refer to the following code to remove blank lines in the new code:
Code: Select all
            System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch();
            ……
            int x = 1;
            foreach (string key in dictionary.Keys)
            {
                //Console.WriteLine("x="+x);
                Row row = dictionary[key];
                double value = dictionary2[key];

                for (int k = 0; k < 6; k++)
                {
                    sheet.Rows[x].Columns[k].Value = row.list[k];
                }
                sheet.Rows[x].Columns[6].NumberValue = value;
                sheet.Rows[x].Columns[7].Value = row.list[6];
                x++;
            }

            //Delete blank rows from the worksheet.
            for (int s= dictionary.Keys.Count + 2; s <= sheet.Columns[0].Count;)
            {
                sheet.DeleteRow(s);             
            }
            wb.SaveToFile("output.xlsx", FileFormat.Version2016);
            ……

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Fri May 21, 2021 7:20 am

Hello,

Greetings from e-iceblue.
How is your issue going? Can you give us some feedback at your convenience? Thanks in advance.

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Fri May 21, 2021 12:36 pm

Hi,

The code is getting very long for a relatively simple task. Is there a better / shorter more efficient way? maybe like using LINQ to shorten and make the code faster and less complicated !

Regards,

DB

DB_007
 
Posts: 36
Joined: Sat Sep 08, 2018 4:45 pm

Mon May 24, 2021 8:02 am

Hi,

The following code is shorter than the previous code. Please have a try. If there is any question, please feel free to write back.
Code: Select all
            Workbook workbook = new Workbook();           
            workbook.LoadFromFile(filePath);
            Worksheet sheet = workbook.Worksheets[0];
            int columnIndex = sheet.LastColumn - 1;

            //Delete blank row
            for (int i = 1; i < sheet.Columns[0].Count - 1; i++)
            {               
                if (sheet.Rows[i].IsBlank)
                {
                    sheet.DeleteRow(i + 1);
                    i--;
                }
            }

            var lookupRanges = sheet.Range[2, 1, sheet.LastRow, sheet.LastColumn - 2];

            // get the duplicated row numbers 
            var duplicatedRows = lookupRanges.Rows
                   .GroupBy(x => x.Columns[0].DisplayedText)
                   .Where(x => x.Count() > 1)
                   .SelectMany(x => x.Skip(1))
                   .Select(x => x.Columns[0].Row)
                   .ToList();

            var lookupRanges2 = sheet.Range[2, 1, sheet.LastRow, columnIndex];

            if (duplicatedRows.Any())
            {
                var firstRows = lookupRanges2.Rows
                        .GroupBy(x => x.Columns[0].DisplayedText)
                        .Where(x => x.Count() > 1)
                        .Select(item => item.First()).ToList();
                var sumValue = lookupRanges2.Rows
                        .GroupBy(x => x.Columns[0].DisplayedText)
                        .Where(x => x.Count() > 1)
                        .Select(item => item.Sum(c => c.Columns[columnIndex - 1].NumberValue)).ToList();
                for (int i = 0; i < firstRows.Count; i++)
                {
                    firstRows[i].Columns[columnIndex - 1].NumberValue = sumValue[i];
                }
            }

            for (int i = 0; i < duplicatedRows.Count; i++)
            {
                sheet.DeleteRow(duplicatedRows[i] - i);
            }

            workbook.SaveToFile(outputPath);

Sincerely,
Annika
E-iceblue support team
User avatar

Annika.Zhou
 
Posts: 1643
Joined: Wed Apr 07, 2021 2:50 am

Return to Spire.XLS