Hello,
Thanks for your inquiry.
Please note that MS Word document is flow document and does not contain any information about its layout into lines and pages. Therefore, technically there is no “Page” and “Line” concept in Word document, our Spire.Doc product is based on MS Word specification, so it is impossible to get the paragraph on which page.
Please refer to the following code to check font, size, color of all content of the entire document, get the page width in cm and check whether the URL is valid, I have attached my test file too.
- Code: Select all
class Program
{
static void Main(string[] args)
{
Document doc = new Document();
doc.LoadFromFile(@"E:\testdoc\CheckWord.docx");
List<FontInfo> HeadInfos = new List<FontInfo>();
List<FontInfo> ParaInfos = new List<FontInfo>();
List<PageInfo> PageInfos = new List<PageInfo>();
List<UrlInfo> UrlInfos = new List<UrlInfo>();
foreach (Section sec in doc.Sections)
{
//page width (left and right margin but in cm)
//2.54cm = 72pt
PageInfo pageInfo = new PageInfo();
pageInfo.Left = sec.PageSetup.Margins.Left * 2.54f / 72f;
pageInfo.Right = sec.PageSetup.Margins.Right * 2.54f / 72f;
PageInfos.Add(pageInfo);
foreach (DocumentObject obj in sec.Body.ChildObjects)
{
if(obj.DocumentObjectType == DocumentObjectType.Paragraph)
{
Paragraph para = obj as Paragraph;
if (para.StyleName.Contains("Heading")) //Title font, size, color
{
foreach(DocumentObject paraObj in para.ChildObjects)
{
if (paraObj.DocumentObjectType == DocumentObjectType.TextRange)
{
TextRange textRange = paraObj as TextRange;
FontInfo head = new FontInfo();
head.FontName = textRange.CharacterFormat.FontName;
head.FontSize = textRange.CharacterFormat.FontSize;
head.FontColor = textRange.CharacterFormat.TextColor;
HeadInfos.Add(head);
}
}
}else if (!para.Text.Equals("")) //font, size, color of each paragraph
{
foreach (DocumentObject paraObj in para.ChildObjects)
{
if (paraObj.DocumentObjectType == DocumentObjectType.TextRange)
{
TextRange textRange = paraObj as TextRange;
FontInfo parainfo = new FontInfo();
parainfo.FontName = textRange.CharacterFormat.FontName;
parainfo.FontSize = textRange.CharacterFormat.FontSize;
parainfo.FontColor = textRange.CharacterFormat.TextColor;
ParaInfos.Add(parainfo);
}
if(paraObj.DocumentObjectType == DocumentObjectType.Field)
{
Field field = paraObj as Field;
if (field.Type.Equals(FieldType.FieldHyperlink))
{
UrlInfo urlInfo = new UrlInfo();
urlInfo.Url = field.FieldText;
UrlInfos.Add(urlInfo);
}
}
}
}
}
}
}
string pattern = "[a-zA-z]+://[^\\s]*";
Regex rgx = new Regex(pattern);
TextSelection[] textSelections = doc.FindAllPattern(rgx);
foreach(TextSelection textSelection in textSelections)
{
TextRange textRange = textSelection.GetAsOneRange();
UrlInfo urlInfo = new UrlInfo();
urlInfo.Url = textRange.Text;
UrlInfos.Add(urlInfo);
}
//check whether the url is valid
foreach (UrlInfo urlInfo in UrlInfos)
{
string url = urlInfo.Url;
HttpWebRequest req = null;
try
{
req = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url));
req.Method = "HEAD";
req.Timeout = 10000;
HttpWebResponse res = (HttpWebResponse)req.GetResponse();
if (Convert.ToInt32(res.StatusCode).ToString().Equals("200"))
{
urlInfo.Message = "the url is valid";
}
}
catch (Exception ex)
{
urlInfo.Message = ex.Message;
}
finally
{
if (req != null)
{
req.Abort();
req = null;
}
}
}
}
}
public class FontInfo
{
public string FontName;
public float FontSize;
public Color FontColor;
}
public class PageInfo
{
public float Left;
public float Right;
}
public class UrlInfo
{
public string Url;
public string Message;
}
If the code does not match your needs, please provide us with your input file and result sample. Thanks in advance.
Sincerely,
Marcia
E-iceblue support team
Login to view the files attached to this post.