How to split a word document by page break in C#

With the help of Spire.Doc for .NET, not only can we split a word document by section, but also can split it by page break. We've already introduced how to split a Word document into multiple documents by section break. In this article, we'll learn how to split a word document by page break with Spire.Doc for .NET.

Please view the following screenshot of the original word document which has two page breaks at the end of the first and the second page.

How to split a word document by page break in C#

Now refer to the following detail steps to split it into 3 separate documents by page breaks.

Step 1: Create a word document and load the original word document.

Document original = new Document();
original.LoadFromFile("New Zealand.docx");

Step 2: Create a new word document and add a section to it.

Document newWord = new Document();
Section section = newWord.AddSection();

Step 3: Split the original word document into separate documents according to page break.

int index = 0;
//traverse through all sections of original document
foreach (Section sec in original.Sections)
{
    //traverse through all body child objects of each section
    foreach (DocumentObject obj in sec.Body.ChildObjects)
    {
        if (obj is Paragraph)
        {
            Paragraph para = obj as Paragraph;
            //add paragraph object in original section into section of new document
            section.Body.ChildObjects.Add(para.Clone());
            foreach (DocumentObject parobj in para.ChildObjects)
            {
                if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
                {
                    //get the index of page break in paragraph
                    int i = para.ChildObjects.IndexOf(parobj);
                    //remove the page break from its paragraph
                    section.Body.LastParagraph.ChildObjects.RemoveAt(i);

                    //save the new document to a .docx file.
                    newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
                    index++;

                    //create a new document
                    newWord = new Document();
                    //add a section for document
                    section = newWord.AddSection();
                    //add paragraph object in original section into section of new document
                    section.Body.ChildObjects.Add(para.Clone());
                    if (section.Paragraphs[0].ChildObjects.Count == 0)
                    {
                        //remove the first blank paragraph
                        section.Body.ChildObjects.RemoveAt(0);
                    }
                    else
                    {
                        //remove the child objects before the page break
                        while (i >= 0)
                        {
                            section.Paragraphs[0].ChildObjects.RemoveAt(i);
                            i--;
                        }
                    }
                }
            }
        }
        if (obj is Table)
        {
            //add table object in original section into section of new document
            section.Body.ChildObjects.Add(obj.Clone());
        }
    }
}
//save to a .docx file
newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);

Output:

How to split a word document by page break in C#

Full codes:

using System;
using Spire.Doc;
using Spire.Doc.Documents;

namespace Split_Word_Document_by_Page_Break
{
    class Program
    {
        static void Main(string[] args)
        {
            Document original = new Document();
            original.LoadFromFile("New Zealand.docx");
            Document newWord = new Document();
            Section section = newWord.AddSection();

            int index = 0;
            foreach (Section sec in original.Sections)
            {
                foreach (DocumentObject obj in sec.Body.ChildObjects)
                {
                    if (obj is Paragraph)
                    {
                        Paragraph para = obj as Paragraph;
                        section.Body.ChildObjects.Add(para.Clone());

                        foreach (DocumentObject parobj in para.ChildObjects)
                        {
                            if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
                            {
                                int i = para.ChildObjects.IndexOf(parobj);
                                section.Body.LastParagraph.ChildObjects.RemoveAt(i);
                                newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
                                index++;

                                newWord = new Document();
                                section = newWord.AddSection();
                                section.Body.ChildObjects.Add(para.Clone());
                                if (section.Paragraphs[0].ChildObjects.Count == 0)
                                {
                                    section.Body.ChildObjects.RemoveAt(0);
                                }
                                else
                                {
                                    while (i >= 0)
                                    {
                                        section.Paragraphs[0].ChildObjects.RemoveAt(i);
                                        i--;
                                    }
                                }
                            }
                        }
                    }
                    if (obj is Table)
                    {
                        section.Body.ChildObjects.Add(obj.Clone());
                    }
                }
            }
            newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
        }
    }
}