Tuesday, 28 December 2010 06:45

How to Traverse a Document Tree

Spire.Doc represents a document as a tree, every document element is a node of that tree. Some nodes such as section, paragraph and table may have many child nodes. For example, a section node has several paragraph nodes, a paragraph node has many text nodes and each row is the child node of a table node. And other nodes have no child node, such as text-range, image, form-field.
If a node has child nodes, it should be an instance of Spire.Doc.Interface.ICompositeObject.
If you want to operate all the nodes, you can use the tree navigation method to visit each node.

Document Tree Traversal

The following example demonstrates how to traverse a document tree to collect all nodes and ouput the text of all text-range nodes.
[C#]
using System;
using System.Collections.Generic;
using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using Spire.Doc.Interface;
using Spire.Doc.Collections;

namespace ExtractText
{
    class Program
    {
        static void Main(string[] args)
        {
            //Open a word document.
            Document document = new Document("Sample.doc");
            IList<IDocumentObject> nodes = GetAllObjects(document);
            foreach (IDocumentObject node in nodes)
            {
                //Judge the object type. 
                if (node.DocumentObjectType == DocumentObjectType.TextRange)
                {
                    TextRange textNode = node as TextRange;
                    Console.WriteLine(textNode.Text);
                }
            }
        }

        private static IList<IDocumentObject> GetAllObjects(Document document)
        {
        
            //Create a list.
            List<IDocumentObject> nodes = new List<IDocumentObject>();
            
            //Create a new queue.
            Queue<ICompositeObject> containers = new Queue<ICompositeObject>();
            
            //Put the document objects in the queue.
            containers.Enqueue(document);
            while (containers.Count > 0)
            {
                ICompositeObject container = containers.Dequeue();
                DocumentObjectCollection docObjects = container.ChildObjects;
                foreach (DocumentObject docObject in docObjects)
                { 
                    nodes.Add(docObject);
                    
                    //Judge the docObject.
                    if (docObject is ICompositeObject)
                    {
                        containers.Enqueue(docObject as ICompositeObject);
                    }
                }
            }

            return nodes;
        }
    }
}
          
[VB.NET]
Imports System
Imports System.Collections.Generic
Imports Spire.Doc
Imports Spire.Doc.Documents
Imports Spire.Doc.Fields
Imports Spire.Doc.Interface
Imports Spire.Doc.Collections

Module Module1

    Sub Main()
        'Open a word document.
        Dim document As New Document("Sample.doc")
        Dim nodes As IList(Of IDocumentObject)() = GetAllObjects(document)
        Dim containers As New Queue(Of ICompositeObject)()

        For Each node As IDocumentObject In nodes
        
            'Judge the object type.
            If (node.DocumentObjectType = DocumentObjectType.TextRange) Then
                Dim textNode As TextRange = node
                Console.WriteLine(textNode.Text)

            End If
        Next
    End Sub
    Function GetAllObjects(ByVal document As Document) As IList(Of IDocumentObject)
        
        'Create a list.
        Dim nodes As New List(Of IDocumentObject)()
        
        'Create a new queue.
        Dim containers As New Queue(Of ICompositeObject)()
        
        'Put the document objects in the queue.
        containers.Enqueue(document)
        While (containers.Count > 0)
            Dim container As ICompositeObject = containers.Dequeue()
            Dim docObjects As DocumentObjectCollection = container.ChildObjects
            For Each docObject As DocumentObject In docObjects
                nodes.Add(docObject)
                
                'Judge the docObject.
                If TypeOf docObject Is ICompositeObject Then
                    containers.Enqueue(TryCast(docObject, ICompositeObject))
                End If
            Next
        End While

        Return nodes
    End Function
End Module
          
Published in Others