■下準備:参照設定しておくこと
■サンプルコード Basic認証なしの場合
Sub HTTPリクエスト() Dim httpReq As XMLHTTP60 Set httpReq = New XMLHTTP60 httpReq.Open "GET", "https://www.au.com/mobile/area/5g/station/" httpReq.Send 'Do While httpReq.readyState < 4 ' DoEvents 'Loop Do While (httpReq.readyState < 4) And (httpReq.Status <> 200) DoEvents Loop Dim htmlDoc As Object Set htmlDoc = New HTMLDocument htmlDoc.write httpReq.responseText MsgBox "スクレイピング開始" Dim elementLength As Long elementLength = htmlDoc.all.Length - 1 Dim wb As Workbook Dim strPath As String strPath = "C:\Users\cryst\OneDrive\デスクトップ\ExcelVBAプロジェクト\webスクレイピング\test.xlsx" 'Set wb = Workbooks.Add ' 'wb.SaveAs strPath Dim i As Long Dim stationName As String 'For i = 1023 To htmlDoc.all.Length - 1 'For i = 472 To 2000 For i = 472 To elementLength ' Debug.Print htmlDoc.all(i).tagname ' If i = 1023 Then ' Stop ' End If If htmlDoc.all(i).tagname = "H3" Then Debug.Print i & "番" & htmlDoc.all(i).innerText End If ' If htmlDoc.all(i).tagname = "TR" And htmlDoc.all(i).tagname <> "" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' Debug.Print htmlDoc.all(i).className ' End If If htmlDoc.all(i).tagname = "TD" And htmlDoc.all(i).tagname <> "" Then stationName = htmlDoc.all(i).innerText Debug.Print i & "番" & htmlDoc.all(i).innerText 'Debug.Print htmlDoc.all(i).className End If If htmlDoc.all(i).tagname = "P" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If htmlDoc.all(i).innerText = "改札付" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' End If If htmlDoc.all(i).tagname = "SPAN" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If InStr(htmlDoc.all(i).innerText, "route") > 0 Or InStr(htmlDoc.all(i).innerText, "map") > 0 Then ' Debug.Print htmlDoc.all(i).tagname ' Debug.Print htmlDoc.all(i).className ' End If Next 'Debug.Print htmlDoc.Title 'Debug.Print htmlDoc.all(1023).innerText 'Debug.Print htmlDoc.all(1023).tagname 'Debug.Print htmlDoc.getElementsByTagName("TR")(0).innerText Set htmlDoc = Nothing Set httpReq = Nothing MsgBox "スクレイピング完了" End Sub
■サンプルコードBasic認証ありの場合
※ページ一番下にあるコード「 Function EncodeBase64Str」でログインIDとPWをBase64エンコードする必要がある
Sub Basic認証ありHTTPリクエスト() '--------------------------------------------------- '機能:Basic認証ありのHPよりWebスクレイピングする '参照設定:以下を参照設定すること ' Microsoft XML v6.0 ' Microsoft HTML Object Library '-------------------------------------------------- Dim httpReq As XMLHTTP60 Set httpReq = New XMLHTTP60 '------------------------------------------------ '★URLとログインIDとログインPWを設定 Dim URL As String Dim UserName As String Dim Password As String URL = "https://www.test.com/" UserName = "A12345" Password = "abc123" '★URLを送信 httpReq.Open "GET", URL, False '----------------------------------------------------- '★Authorizationヘッダーでユーザー名とパスワード送信 httpReq.setRequestHeader "Authorization", "Basic " & EncodeBase64Str(UserName & ":" & Password) 'キャッシュ対策 httpReq.setRequestHeader "Pragma", "no-cache" httpReq.setRequestHeader "Cache-Control", "no-cache" httpReq.setRequestHeader "If-Modified-Since", "Thu, 01 Jun 1970 00:00:00 GMT" '★HTTPリクエストをSend httpReq.Send '---------------------------------------------------- '★HTTPリクエストが完了するまでループ Do While (httpReq.readyState < 4) And (httpReq.Status <> 200) DoEvents Loop '---------------------------------------------------- '★レスポンスのあったテキストをHTMLドキュメント型に変換 Dim htmlDoc As Object Set htmlDoc = New HTMLDocument htmlDoc.write httpReq.responseText 'HTMLドキュメント型に変換した文字列の書込み速度が間に合わないため 'メッセージBoxを挟むことで速度調整 MsgBox "スクレイピング開始" Dim elementLength As Long elementLength = htmlDoc.all.Length - 1 Dim wb As Workbook Dim strPath As String strPath = "C:\Users\ユーザ名\デスクトップ\test.xlsx" 'Set wb = Workbooks.Add ' 'wb.SaveAs strPath Dim i As Long Dim stationName As String 'For i = 1023 To htmlDoc.all.Length - 1 'For i = 472 To 2000 For i = 472 To elementLength ' Debug.Print htmlDoc.all(i).tagname ' If i = 1023 Then ' Stop ' End If If htmlDoc.all(i).tagname = "H3" Then Debug.Print i & "番" & htmlDoc.all(i).innerText End If ' If htmlDoc.all(i).tagname = "TR" And htmlDoc.all(i).tagname <> "" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' Debug.Print htmlDoc.all(i).className ' End If If htmlDoc.all(i).tagname = "TD" And htmlDoc.all(i).tagname <> "" Then stationName = htmlDoc.all(i).innerText Debug.Print i & "番" & htmlDoc.all(i).innerText 'Debug.Print htmlDoc.all(i).className End If If htmlDoc.all(i).tagname = "P" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If htmlDoc.all(i).innerText = "改札付" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' End If If htmlDoc.all(i).tagname = "SPAN" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If InStr(htmlDoc.all(i).innerText, "route") > 0 Or InStr(htmlDoc.all(i).innerText, "map") > 0 Then ' Debug.Print htmlDoc.all(i).tagname ' Debug.Print htmlDoc.all(i).className ' End If Next 'Debug.Print htmlDoc.Title 'Debug.Print htmlDoc.all(1023).innerText 'Debug.Print htmlDoc.all(1023).tagname 'Debug.Print htmlDoc.getElementsByTagName("TR")(0).innerText Set htmlDoc = Nothing Set httpReq = Nothing MsgBox "スクレイピング完了" End Sub
Private Function EncodeBase64Str(ByVal str As String) As String '文字列をBase64エンコード Dim ret As String Dim d() As Byte Const adTypeBinary = 1 Const adTypeText = 2 ret = "" '初期化 On Error Resume Next With CreateObject("ADODB.Stream") .Open .Type = adTypeText .Charset = "UTF-8" .WriteText str .Position = 0 .Type = adTypeBinary .Position = 3 d = .Read() .Close End With With CreateObject("MSXML2.DOMDocument").createElement("base64") .DataType = "bin.base64" .nodeTypedValue = d ret = .Text End With On Error GoTo 0 EncodeBase64Str = ret End Function
■参考URL
エクセルVBAでIEを使わずにHTMLドキュメントを取得する方法
エクセルVBAでHTTPリクエストをする最も簡単なプログラム
Option Explicit Private Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long) Private Function EncodeBase64Str(ByVal str As String) As String '文字列をBase64エンコード Dim ret As String Dim d() As Byte Const adTypeBinary = 1 Const adTypeText = 2 ret = "" '初期化 On Error Resume Next With CreateObject("ADODB.Stream") .Open .Type = adTypeText .Charset = "UTF-8" .WriteText str .Position = 0 .Type = adTypeBinary .Position = 3 d = .Read() .Close End With With CreateObject("MSXML2.DOMDocument").createElement("base64") .DataType = "bin.base64" .nodeTypedValue = d ret = .Text End With On Error GoTo 0 EncodeBase64Str = ret End Function
Sub HTTPリクエスト() Dim httpReq As XMLHTTP60 Set httpReq = New XMLHTTP60 Dim URL As String Dim UserName As String Dim Password As String URL = "" UserName = "" Password = "" httpReq.Open "GET", URL, False '----------------------------------------------------- '★Authorizationヘッダーでユーザー名とパスワード送信 httpReq.setRequestHeader "Authorization", "Basic " & EncodeBase64Str(UserName & ":" & Password) 'キャッシュ対策 httpReq.setRequestHeader "Pragma", "no-cache" httpReq.setRequestHeader "Cache-Control", "no-cache" httpReq.setRequestHeader "If-Modified-Since", "Thu, 01 Jun 1970 00:00:00 GMT" httpReq.Send '---------------------------------------------------- 'httpReq.Open "GET", "https://www.au.com/mobile/area/5g/station/" 'httpReq.Send 'Do While httpReq.readyState < 4 ' DoEvents 'Loop Do While (httpReq.readyState < 4) And (httpReq.Status <> 200) DoEvents Loop Dim htmlDoc As Object Set htmlDoc = New HTMLDocument htmlDoc.write httpReq.responseText MsgBox "スクレイピング開始" Dim elementLength As Long elementLength = htmlDoc.all.Length - 1 Dim wb As Workbook Dim strPath As String strPath = "C:\Users\cryst\OneDrive\デスクトップ\ExcelVBAプロジェクト\webスクレイピング\test.xlsx" 'Set wb = Workbooks.Add ' 'wb.SaveAs strPath Dim i As Long Dim stationName As String 'For i = 1023 To htmlDoc.all.Length - 1 'For i = 472 To 2000 For i = 472 To elementLength ' Debug.Print htmlDoc.all(i).tagname ' If i = 1023 Then ' Stop ' End If If htmlDoc.all(i).tagname = "H3" Then Debug.Print i & "番" & htmlDoc.all(i).innerText End If ' If htmlDoc.all(i).tagname = "TR" And htmlDoc.all(i).tagname <> "" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' Debug.Print htmlDoc.all(i).className ' End If If htmlDoc.all(i).tagname = "TD" And htmlDoc.all(i).tagname <> "" Then stationName = htmlDoc.all(i).innerText Debug.Print i & "番" & htmlDoc.all(i).innerText 'Debug.Print htmlDoc.all(i).className End If If htmlDoc.all(i).tagname = "P" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If htmlDoc.all(i).innerText = "改札付" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' End If If htmlDoc.all(i).tagname = "SPAN" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If InStr(htmlDoc.all(i).innerText, "route") > 0 Or InStr(htmlDoc.all(i).innerText, "map") > 0 Then ' Debug.Print htmlDoc.all(i).tagname ' Debug.Print htmlDoc.all(i).className ' End If Next 'Debug.Print htmlDoc.Title 'Debug.Print htmlDoc.all(1023).innerText 'Debug.Print htmlDoc.all(1023).tagname 'Debug.Print htmlDoc.getElementsByTagName("TR")(0).innerText Set htmlDoc = Nothing Set httpReq = Nothing MsgBox "スクレイピング完了" End Sub
Sub HTTPリクエスト() Dim httpReq As XMLHTTP60 Set httpReq = New XMLHTTP60 httpReq.Open "GET", "https://www.au.com/mobile/area/5g/station/" httpReq.Send Do While httpReq.readyState < 4 DoEvents Loop Dim htmlDoc As Object Set htmlDoc = New HTMLDocument htmlDoc.write httpReq.responseText Dim i As Long 'For i = 1023 To htmlDoc.all.Length - 1 For i = 472 To 2000 ' Debug.Print htmlDoc.all(i).tagname ' If i = 1023 Then ' Stop ' End If If htmlDoc.all(i).tagname = "H3" Then Debug.Print i & "番" & htmlDoc.all(i).innerText End If ' If htmlDoc.all(i).tagname = "TR" And htmlDoc.all(i).tagname <> "" Then ' Debug.Print i & "番" & htmlDoc.all(i).innerText ' Debug.Print htmlDoc.all(i).className ' End If If htmlDoc.all(i).tagname = "TD" And htmlDoc.all(i).tagname <> "" Then Debug.Print i & "番" & htmlDoc.all(i).innerText 'Debug.Print htmlDoc.all(i).className End If If htmlDoc.all(i).tagname = "P" Then Debug.Print i & "番" & htmlDoc.all(i).className End If If htmlDoc.all(i).innerText = "改札付" Then Debug.Print i & "番" & htmlDoc.all(i).innerText End If If htmlDoc.all(i).tagname = "SPAN" Then Debug.Print i & "番" & htmlDoc.all(i).className End If ' If InStr(htmlDoc.all(i).innerText, "route") > 0 Or InStr(htmlDoc.all(i).innerText, "map") > 0 Then ' Debug.Print htmlDoc.all(i).tagname ' Debug.Print htmlDoc.all(i).className ' End If Next 'Debug.Print htmlDoc.Title 'Debug.Print htmlDoc.all(1023).innerText 'Debug.Print htmlDoc.all(1023).tagname 'Debug.Print htmlDoc.getElementsByTagName("TR")(0).innerText Set htmlDoc = Nothing Set httpReq = Nothing End Sub
■参考URL
エクセルVBAでIEを使わずにHTMLドキュメントを取得する方法
エクセルVBAでHTTPリクエストをする最も簡単なプログラム