I'm trying to split particular pages from a PDF based on the text-titles-keyword presents in the pdf, eg: take from a page that has the title "TitleToStartSplitting" till you find "TittleToStopSplitting" then stop and generate the first pdf then another split from the 2nd title to the 3rd title and add it to a separate pdf and so on since we can't rely on the pages numbers or counts or split by other logic and we need to create new documents with specific pages content
Any help will be much appreciated and thank you in advance
Code is written in Asp Core 5-c#
Spire version licensed
My attempt is based on a question that exists in Spire-Forum
- Code: Select all
PdfTextFind[] findResult = null;
List<int> urlBorrower = new List<int>();
foreach (PdfPageBase page in pdf.Pages)
{
//Find text
findResult = page.FindText("TitleOne", TextFindParameter.CrossLine).Finds;
foreach (PdfTextFind find in findResult)
{
int pageindex = find.SearchPageIndex;
urlBorrower.Add(pageindex);
}
var lender = page.FindText("TitleTwo", TextFindParameter.CrossLine).Finds;
var title3 = page.FindText("Title3", TextFindParameter.CrossLine).Finds;
var title4 = page.FindText("title4", TextFindParameter.CrossLine).Finds;
if(title3.Length > 0 || title4.Length>0 || lender.Length > 0)
break;
}
PdfDocument newpdf = new PdfDocument();
for (int i = 0; i < urlBorrower.Count; i++)
{
int currentIndex = urlBorrower[i];
int nextIndex = 0;
if (i.Equals(urlBorrower.Count - 1))
{
nextIndex = pdf.Pages.Count;
}
else
{
nextIndex = urlBorrower[i + 1];
}
for (int j = currentIndex; j < nextIndex; j++)
{
PdfPageBase page = pdf.Pages[j];
PdfPageBase newPage = newpdf.Pages.Add(page.ActualSize, new PdfMargins(0));
page.CreateTemplate().Draw(newPage.Canvas, new PointF(0, 0));
}
newpdf.SaveToFile(@"custom" + i + ".pdf", FileFormat.PDF);
}
newpdf.Dispose();