Remove Duplicate Rows in Excel in C#, VB.NET

This article demonstrates how to remove the duplicate rows in an Excel file in C# and VB.NET using Spire.XLS for .NET.

Below is the screenshot of the input Excel file:

Remove Duplicate Rows in Excel in C#, VB.NET

C#
using Spire.Xls;
using System.Linq;

namespace RemoveDuplicateRows
{
    class Program
    {
        static void Main(string[] args)
        {
            //Create a Workbook instance
            Workbook workbook = new Workbook();
            //Load the Excel file
            workbook.LoadFromFile("TEST.xlsx");

            //Get the first worksheet
            Worksheet sheet = workbook.Worksheets[0];

            //Specify the range that you want to remove duplicate records from.
            var range = sheet.Range["A1:A" + sheet.LastRow];
            //Get the duplicated row numbers  
            var duplicatedRows = range.Rows
                   .GroupBy(x => x.Columns[0].DisplayedText)
                   .Where(x => x.Count() > 1)
                   .SelectMany(x => x.Skip(1))
                   .Select(x => x.Columns[0].Row)
                   .ToList();

            //Remove the duplicate rows & blank rows if any           
            for (int i = 0; i < duplicatedRows.Count; i++)
            {
                sheet.DeleteRow(duplicatedRows[i] - i);
            }

            //Save the result file
            workbook.SaveToFile("Output.xlsx", ExcelVersion.Version2013);
        }
    }
}
VB.NET
Imports Spire.Xls
Imports System.Linq

Namespace RemoveDuplicateRows
    Class Program
        Private Shared Sub Main(ByVal args As String())
            Dim workbook As Workbook = New Workbook()
            workbook.LoadFromFile("TEST.xlsx")
            Dim sheet As Worksheet = workbook.Worksheets(0)
            Dim range = sheet.Range("A1:A" & sheet.LastRow)
            Dim duplicatedRows = range.Rows.GroupBy(Function(x) x.Columns(0).DisplayedText).Where(Function(x) x.Count() > 1).SelectMany(Function(x) x.Skip(1)).[Select](Function(x) x.Columns(0).Row).ToList()

            For i As Integer = 0 To duplicatedRows.Count - 1
                sheet.DeleteRow(duplicatedRows(i) - i)
            Next

            workbook.SaveToFile("Output.xlsx", ExcelVersion.Version2013)
        End Sub
    End Class
End Namespace

Output:

Remove Duplicate Rows in Excel in C#, VB.NET