WebElementから指定したタグのみ抽出するのは以下のコードで出来ます。
FindElementsByTag("H1") FindElementsByTag("H2") FindElementsByTag("H3")
H1だけ、H2だけ、H3だけのタグは抽出できますが、出てきた順に階層化することはできません。
FindElementByTagでORが使う事ができれば解決するのですが・・・
OR条件が使える Xpathを使う
以下のXpathを指定すると FindElementsByXpath でコレクションが取得できるので、For Eachで出てきた順にタグを取得することができます。
//*[ self::h1 and not(.=preceding::h1)
or self::h2 and not(.=preceding::h2)
or self::h3 and not(.=preceding::h3) ]
実際のコーディング例
'Xpathでマッチングさせる方法
Function GetHeadingList(objWeb As Object, iLevel As String) As String
Dim i As Integer
Dim sXpath As String
Dim sRtn As String
Dim elmLoop As WebElement
For i = 1 To iLevel
If i <> 1 Then sXpath = sXpath & "or "
sXpath = sXpath & " self::h" & i & " and not(.=preceding::h" & i & ")"
Next
For Each elmLoop In objWeb.FindElementsByXPath("//*[" & sXpath & "]")
sRtn = sRtn & elmLoop.tagname & vbTab & Replace(Replace(elmLoop.Text, vbCr, ""), vbLf, "") & vbCrLf
Next
GetHeadingList = Left(sRtn, Len(sRtn) - Len(vbCrLf))
End Function
FindElementsByTagを使う
FindElementsByTagではなんと * が使えます。
* を指定すると全タグを順に取得できます。
ただし遅いです。
速度を気にせず、とにかく目的を達成できれば良いという時には有用な手段の1つになるかと思います。
以下コーディング例
'全タグを調べる方法(遅い)
Function GetHeadingList2(objWeb As Object) As String
Dim elmLoop As WebElement
Dim sRtn As String
For Each elmLoop In objWeb.FindElementsByTag("*")
Select Case UCase(elmLoop.tagname)
Case "H1", "H2", "H3", "H4", "H5", "H6"
sRtn = sRtn & elmLoop.tagname & vbTab & Replace(Replace(elmLoop.Text, vbCr, ""), vbLf, "") & vbCrLf
End Select
Next
GetHeadingList2 = Left(sRtn, Len(sRtn) - Len(vbCrLf))
End Function
使用方法
Sub Main()
Dim driver As New PhantomJSDriver
Dim sTmp As String
Dim i As Integer: For i = 1 To 200: Debug.Print: Next
With driver
.Get "https://florentbr.github.io/SeleniumBasic/"
sTmp = GetHeadingList(driver, 4)
Debug.Print sTmp
Debug.Print String(50, "-")
sTmp = GetHeadingList2(driver)
Debug.Print sTmp
End With
End Sub
実行結果
h1 Seleniumbasic h2 A Selenium based browser automation framework for VB.Net, VBA and VBScript h3 Description h3 Download h3 Bug tracker h3 Third party software h3 Tested environments h3 Authors and Contributors -------------------------------------------------- h1 Seleniumbasic h2 A Selenium based browser automation framework for VB.Net, VBA and VBScript h3 Description h3 Download h3 Bug tracker h3 Third party software h3 Tested environments h3 Authors and Contributors
全ソース
Option Explicit
Sub Main()
Dim driver As New PhantomJSDriver
Dim sTmp As String
Dim i As Integer: For i = 1 To 200: Debug.Print: Next
With driver
.Get "https://florentbr.github.io/SeleniumBasic/"
sTmp = GetHeadingList(driver, 4)
Debug.Print sTmp
Debug.Print String(50, "-")
sTmp = GetHeadingList2(driver)
Debug.Print sTmp
End With
End Sub
'Xpathでマッチングさせる方法
Function GetHeadingList(objWeb As Object, iLevel As String) As String
Dim i As Integer
Dim sXpath As String
Dim sRtn As String
Dim elmLoop As WebElement
For i = 1 To iLevel
If i <> 1 Then sXpath = sXpath & "or "
sXpath = sXpath & " self::h" & i & " and not(.=preceding::h" & i & ")"
Next
For Each elmLoop In objWeb.FindElementsByXPath("//*[" & sXpath & "]")
sRtn = sRtn & elmLoop.tagname & vbTab & Replace(Replace(elmLoop.Text, vbCr, ""), vbLf, "") & vbCrLf
Next
GetHeadingList = Left(sRtn, Len(sRtn) - Len(vbCrLf))
End Function
'全タグを調べる方法(遅い)
Function GetHeadingList2(objWeb As Object) As String
Dim elmLoop As WebElement
Dim sRtn As String
For Each elmLoop In objWeb.FindElementsByTag("*")
Select Case UCase(elmLoop.tagname)
Case "H1", "H2", "H3", "H4", "H5", "H6"
sRtn = sRtn & elmLoop.tagname & vbTab & Replace(Replace(elmLoop.Text, vbCr, ""), vbLf, "") & vbCrLf
End Select
Next
GetHeadingList2 = Left(sRtn, Len(sRtn) - Len(vbCrLf))
End Function
コメント