With the help of Spire.Doc for .NET, not only can we split a word document by section, but also can split it by page break. We've already introduced how to split a Word document into multiple documents by section break. In this article, we'll learn how to split a word document by page break with Spire.Doc for .NET.
Please view the following screenshot of the original word document which has two page breaks at the end of the first and the second page.
Now refer to the following detail steps to split it into 3 separate documents by page breaks.
Step 1: Create a word document and load the original word document.
Document original = new Document(); original.LoadFromFile("New Zealand.docx");
Step 2: Create a new word document and add a section to it.
Document newWord = new Document(); Section section = newWord.AddSection();
Step 3: Split the original word document into separate documents according to page break.
int index = 0; //traverse through all sections of original document foreach (Section sec in original.Sections) { //traverse through all body child objects of each section foreach (DocumentObject obj in sec.Body.ChildObjects) { if (obj is Paragraph) { Paragraph para = obj as Paragraph; //add paragraph object in original section into section of new document section.Body.ChildObjects.Add(para.Clone()); foreach (DocumentObject parobj in para.ChildObjects) { if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak) { //get the index of page break in paragraph int i = para.ChildObjects.IndexOf(parobj); //remove the page break from its paragraph section.Body.LastParagraph.ChildObjects.RemoveAt(i); //save the new document to a .docx file. newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx); index++; //create a new document newWord = new Document(); //add a section for document section = newWord.AddSection(); //add paragraph object in original section into section of new document section.Body.ChildObjects.Add(para.Clone()); if (section.Paragraphs[0].ChildObjects.Count == 0) { //remove the first blank paragraph section.Body.ChildObjects.RemoveAt(0); } else { //remove the child objects before the page break while (i >= 0) { section.Paragraphs[0].ChildObjects.RemoveAt(i); i--; } } } } } if (obj is Table) { //add table object in original section into section of new document section.Body.ChildObjects.Add(obj.Clone()); } } } //save to a .docx file newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
Output:
Full codes:
using System; using Spire.Doc; using Spire.Doc.Documents; namespace Split_Word_Document_by_Page_Break { class Program { static void Main(string[] args) { Document original = new Document(); original.LoadFromFile("New Zealand.docx"); Document newWord = new Document(); Section section = newWord.AddSection(); int index = 0; foreach (Section sec in original.Sections) { foreach (DocumentObject obj in sec.Body.ChildObjects) { if (obj is Paragraph) { Paragraph para = obj as Paragraph; section.Body.ChildObjects.Add(para.Clone()); foreach (DocumentObject parobj in para.ChildObjects) { if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak) { int i = para.ChildObjects.IndexOf(parobj); section.Body.LastParagraph.ChildObjects.RemoveAt(i); newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx); index++; newWord = new Document(); section = newWord.AddSection(); section.Body.ChildObjects.Add(para.Clone()); if (section.Paragraphs[0].ChildObjects.Count == 0) { section.Body.ChildObjects.RemoveAt(0); } else { while (i >= 0) { section.Paragraphs[0].ChildObjects.RemoveAt(i); i--; } } } } } if (obj is Table) { section.Body.ChildObjects.Add(obj.Clone()); } } } newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx); } } }