I'm considering to use Spire.PDF to extract text/image/graphic contents from PDF files. I want to get position and size information of those contents in PDF page.
How can I get the information?
If you give me some example code, I am very happy.
PdfDocument pdf = new PdfDocument();
pdf.LoadFromFile(inputFile);
var page = pdf.Pages[0];
StringBuilder builder = new StringBuilder();
for (int i = 0; i < page.ImagesInfo.Length; i++)
{
//Get image location
float x = page.ImagesInfo[i].Bounds.Location.X;
float y = page.ImagesInfo[i].Bounds.Location.Y;
//Get image size
float width = page.ImagesInfo[i].Bounds.Width;
float height = page.ImagesInfo[i].Bounds.Height;
string imageFileName = string.Format("Image-{0}.png", i);
Image images = page.ImagesInfo[i].Image;
images.Save(imageFileName, ImageFormat.Png);
builder.AppendLine(imageFileName + "==x: " + x.ToString() + " y: " + y.ToString() + " width: " + width.ToString() + " height: " + height.ToString());
}
PdfTextFindCollection collection = page.FindAllText();
foreach (PdfTextFind find in collection.Finds)
{
//Get text position
PointF point = find.Position;
//Get text content
string str = find.MatchText;
//Get text size
SizeF size = find.Size;
builder.AppendLine(str + "==" + point.ToString() + size.ToString());
}
File.WriteAllText("result.txt", builder.ToString());
PdfDocument pdf = new PdfDocument();
pdf.LoadFromFile("inputFile");
PdfTextFind[] result = null;
foreach (PdfPageBase page in pdf.Pages)
{
//Find text
result = page.FindText("searchText ", TextFindParameter.CrossLine).Finds;
foreach (PdfTextFind find in result)
{
//Get the position coordinates of text that does not cross lines
PointF pointF = find.Position;
//Get the position coordinates of text across lines
List<PointF> pointFs = find.Positions;
}
}