I am using Spire.Pdf for .NET 10.2.2.0 to extract text from tables in a PDF file.
The problem is that it has some kind of background layer's text data as below:
So, when I extract text from cell, it also returns these background layer's text "열", "람" and "용".
How should I change my code?
PdfTableExtractor _tableExtractor = new PdfTableExtractor(doc);
PdfTable[] _tableList = null;
string _title = string.Empty;
for (int i = 0; i < doc.Pages.Count; i++)
{
// extract from table
_tableList = _tableExtractor.ExtractTable(i);
int _tableNumber = 0;
if (_tableList != null && _tableList.Length > 0)
{
foreach (PdfTable _table in _tableList)
{
int _row = _table.GetRowCount();
int _column = _table.GetColumnCount();
for (int j = 0; j < _row; j++)
{
for (int k = 0; k < _column; k++)
{
string _text = _table.GetText(j, k);
}
}
}
}
}