最近在研究word插件开发 ,在读取word内容碰到棘手的地方一一记录下来,下面是分页读取word的内容
Dim document As Document = Globals.ThisAddIn.Application.ActiveDocument
Dim application As Microsoft.Office.Interop.Word.Application = Globals.ThisAddIn.ApplicationDim numnber As Integer = document.BuiltInDocumentProperties(WdBuiltInProperty.wdPropertyPages).value
'Dim numnber As Integer= document.ActiveWindow.Panes(1).Pages.Count
Dim objWhat = Word.WdGoToItem.wdGoToPage
Dim objWhich = Word.WdGoToDirection.wdGoToAbsolute
Dim range1 As Word.Range
Dim range2 As Word.Range
For nIndex = 1 To numnber
range1 = document.GoTo(objWhat, objWhich, nIndex)
range2 = range1.GoToNext(Word.WdGoToItem.wdGoToPage)
Dim startIndex = range1.Start
Dim endIndex = range2.Start
If range1.Start = range2.Start Then
endIndex = document.Characters.Count
End If
‘将word读取的内容解析为html ,呈现到webbrowser(解析过程省略)
myform.WebBrowser1.Document.Write("<!DOCTYPE html> <html lang=""en"" xmlns=""http://www.w3/1999/xhtml""> <head> <meta charset=""utf-8""> <title>况客科技</title> </head> <body>" & paraseXml(document.Range(startIndex, endIndex).XML) & "<br/>----这是下一页的内容---<br/>" & "</body> </html>")
myform.WebBrowser1.Refresh()
Debug.Print(document.Range(startIndex, endIndex).XML)
Debug.Print("============")
Next
改良版的方案:
1、先将word文章转为wordOpenXml
Dim xml As String = ""
Dim startPos = currentSelection.Start Dim endPos = currentSelection.End
xml = document.Range(startPos, endPos).WordOpenXMLxml = document.Content.WordOpenXML
2、将xml转为Html,引用Aspose.Words
//将xml存入流里
Dim sr As MemoryStream = New MemoryStream(Encoding.UTF8.GetBytes(wordopenxml))
Dim doc As New Aspose.Words.Document(sr)
Dim saveOptions As New Aspose.Words.Saving.HtmlSaveOptions()
saveOptions.SaveFormat = Aspose.Words.SaveFormat.Html //保存格式位html
saveOptions.ExportImagesAsBase64 = True //图片转为base64
Dim steam As MemoryStream = New MemoryStream()
doc.Save(steam, saveOptions)
Dim body = System.Text.Encoding.UTF8.GetString(steam.ToArray())
steam.Close()
更多推荐
vb分页读取word内容
发布评论