How do I find and remove duplicate rows from a worksheet using spire.xls?
Also,how do I remove extra whitespace from cells that have them?
Workbook workBook = new Workbook();
workBook.LoadFromFile("input.xlsx");
Worksheet sheet = workBook.Worksheets[0];
for (int i = 0; i < sheet.Columns[0].Count - 1; i++)
{
for (int j = i + 1; j < sheet.Columns[0].Count; j++)
{
if (sheet.Columns[0].CellList[i].Value == sheet.Columns[0].CellList[j].Value)
{
for (int col = 0; col < sheet.Rows[0].Count; col++)
{
if (col == sheet.Rows[0].Count - 1 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
{
sheet.DeleteRow(j + 1);
j--;
}
else if (col != sheet.Rows[0].Count - 1 && sheet.Rows[i].CellList[col].Value == sheet.Rows[j].CellList[col].Value)
{
continue;
}
else
{
break;
}
}
}
}
workBook.SaveToFile("output.xlsx", FileFormat.Version2016);
for (int i = 1; i <= sheet.LastRow; i++)
{
for (int j = 1; j <= sheet.LastColumn; j++)
{
CellRange range = sheet.Range[i, j];
if (range.IsBlank)
{
sheet.DeleteRange(range, DeleteOption.MoveUp);
}
}
}
Do you want to remove the blank cells from sheet?
Workbook workBook = new Workbook();
workBook.LoadFromFile("input.xlsx");
Worksheet sheet = workBook.Worksheets[0];
for (int i = 1; i <= sheet.LastRow; i++)
{
for (int j = 1; j <= sheet.LastColumn; j++)
{
CellRange range = sheet.Range[i, j];
String str = range.Value;
String strings = str.Trim();
range.Value = strings;
}
}
workBook.SaveToFile("output.xlsx",FileFormat.Version2016);
targetWorkbook.LoadFromFile(filePath);
targetSheet = targetWorkbook.Worksheets[0];
var cellRanges = targetSheet.Range["A2:G" + targetSheet.Columns[0].Count];
RemoveDuplicate(targetSheet, cellRanges);
private void RemoveDuplicate(Worksheet sheet, CellRange cellRanges)
{
// get the duplicated row numbers by 1st column data
var duplicatedRows = cellRanges.Rows
.GroupBy(x=> x.Columns[0].Value)
.Where(x=> x.Count() > 1)
.Select(x=> x.Last().Columns[0].Row)
.ToList();
foreach (var element in duplicatedRows)
{
sheet.DeleteRow(element);
}
}
static void Main(string[] args)
{
Workbook workbook = new Workbook();
workbook.LoadFromFile(filePath);
Worksheet sheet = workbook.Worksheets[0];
var rangs = sheet.Range[2, 1, sheet.LastRow, sheet.LastColumn];
RemoveDuplicate(sheet,rangs);
workbook.SaveToFile("out.xlsx");
}
private static void RemoveDuplicate(Worksheet sheet, CellRange cellRanges)
{
// get the duplicated row numbers
var duplicatedRows = cellRanges.Rows
.GroupBy(x => x.Columns[0].DisplayedText)
.Where(x => x.Count() > 1)
.SelectMany(x => x.Skip(1))
.Select(x => x.Columns[0].Row)
.ToList();
for (int i = 0; i < duplicatedRows.Count; i++)
{
sheet.DeleteRow(duplicatedRows[i] - i);
}
}
var excel = new Excel.Application();
var workbook = excel.Workbooks.Open(@"C:\Test\Test.xlsx");
Excel.Worksheet worksheet = workbook.Sheets[1];
var usedRange = worksheet.UsedRange;
usedRange.RemoveDuplicates(1);
workbook.Close(true);
Marshal.ReleaseComObject(excel);