Hi Team,
We need to read and extract the embedded attachments (.docx, .pdf etc.) from the MS word file. Please let us know if it is possible through Spire product.
Thanks.
Document doc = new Document();
//Load file from disk
doc.LoadFromFile("filePath");
//Traverse through all sections of the word document
foreach (Section sec in doc.Sections)
{
//Traverse through all Child Objects in the body of each section
foreach (DocumentObject obj in sec.Body.ChildObjects)
{
//Find the paragraph
if (obj is Paragraph)
{
Paragraph par = obj as Paragraph;
foreach (DocumentObject o in par.ChildObjects)
{
//Check whether the object is OLE
if (o.DocumentObjectType == DocumentObjectType.OleObject)
{
DocOleObject Ole = o as DocOleObject;
string s = Ole.ObjectType;
//If s == "AcroExch.Document.DC", means it's a PDF document
if (s == "AcroExch.Document.DC")
{
File.WriteAllBytes("Result.pdf", Ole.NativeData);
}
//If s == "Excel.Sheet.12", means it's an Excel workbook
else if (s == "Excel.Sheet.12")
{
File.WriteAllBytes("Result.xlsx", Ole.NativeData);
}
//If s == "PowerPoint.Show.12", means it's a PowerPoint File
else if (s == "PowerPoint.Show.12")
{
File.WriteAllBytes("PPTResult.pptx", Ole.NativeData);
}
//If s == "Word.Document.12", means it's a Word document
else if (s == "Word.Document.12")
{
File.WriteAllBytes("WordResult.docx", Ole.NativeData);
}
}
}
}
}
}
...
//Check whether the object type is "Package"
else if ("Package".equals(type)){
//Get file name
String fileName = ole.getPackageFileName();
String extension = fileName.substring(fileName.lastIndexOf("."));
if (extension.equals(".exe")){
}
else if (extension.equals(".dll")){
byte[] bytes = ole.getNativeData();
Files.write(Paths.get("extract.dll"),bytes);
}
}
...
//Load document
Document document = new Document(@"sample.docx");
int index = 0;
List<DocPicture> olePictures = new List<DocPicture>();
foreach (Section section in document.Sections)
{
foreach (Paragraph paragraph in section.Paragraphs)
{
foreach (DocumentObject docObject in paragraph.ChildObjects)
{
if (docObject.DocumentObjectType == DocumentObjectType.OleObject)
{
DocOleObject Ole = docObject as DocOleObject;
olePictures.Add(Ole.OlePicture);
}
}
}
}
//Get Each Section of Document
foreach (Section section in document.Sections)
{
//Get Each Paragraph of Section
foreach (Paragraph paragraph in section.Paragraphs)
{
//Get Each Document Object of Paragraph Items
foreach (DocumentObject docObject in paragraph.ChildObjects)
{
if (docObject.DocumentObjectType == DocumentObjectType.Picture)
{
DocPicture picture = docObject as DocPicture;
foreach(DocPicture olePicture in olePictures)
{
if (picture == olePicture)
{
goto loop;
}
}
picture.Image.Save( string.Format("image_{0}.png", index), System.Drawing.Imaging.ImageFormat.Png);
index++;
}
loop:;
}
}
}