This article demonstrates how to remove the duplicate rows in an Excel file in C# and VB.NET using Spire.XLS for .NET.
Below is the screenshot of the input Excel file:
C#
using Spire.Xls; using System.Linq; namespace RemoveDuplicateRows { class Program { static void Main(string[] args) { //Create a Workbook instance Workbook workbook = new Workbook(); //Load the Excel file workbook.LoadFromFile("TEST.xlsx"); //Get the first worksheet Worksheet sheet = workbook.Worksheets[0]; //Specify the range that you want to remove duplicate records from. var range = sheet.Range["A1:A" + sheet.LastRow]; //Get the duplicated row numbers var duplicatedRows = range.Rows .GroupBy(x => x.Columns[0].DisplayedText) .Where(x => x.Count() > 1) .SelectMany(x => x.Skip(1)) .Select(x => x.Columns[0].Row) .ToList(); //Remove the duplicate rows & blank rows if any for (int i = 0; i < duplicatedRows.Count; i++) { sheet.DeleteRow(duplicatedRows[i] - i); } //Save the result file workbook.SaveToFile("Output.xlsx", ExcelVersion.Version2013); } } }
VB.NET
Imports Spire.Xls Imports System.Linq Namespace RemoveDuplicateRows Class Program Private Shared Sub Main(ByVal args As String()) Dim workbook As Workbook = New Workbook() workbook.LoadFromFile("TEST.xlsx") Dim sheet As Worksheet = workbook.Worksheets(0) Dim range = sheet.Range("A1:A" & sheet.LastRow) Dim duplicatedRows = range.Rows.GroupBy(Function(x) x.Columns(0).DisplayedText).Where(Function(x) x.Count() > 1).SelectMany(Function(x) x.Skip(1)).[Select](Function(x) x.Columns(0).Row).ToList() For i As Integer = 0 To duplicatedRows.Count - 1 sheet.DeleteRow(duplicatedRows(i) - i) Next workbook.SaveToFile("Output.xlsx", ExcelVersion.Version2013) End Sub End Class End Namespace
Output: