vba网抓教程
vba网抓常用方法:1、xmlhttp/winhttp法:用xmlhttp/winhttp模拟向服务器发送请求,接收服务器返回的数据。优点:效率高,基本无兼容性问题。缺点:需要借助如fiddler的工具来模拟http请求。2、IE/webbrowser法:创建IE控件或webbrowser控件,结合htmlfile对象的方法和属性,模拟浏览器操作,获取浏览器页面的数据。优点:这个方法可以模拟大部分的浏览器操作。所见即所得,浏览器能看到的数据就能用代码获取。缺点:各种弹窗相当烦人,兼容性也确实是个很伤脑筋的问题。上传文件在IE里根本无法实现。(有实现方法?请一定告诉我)3、QueryTables法:因为它是excel自带,所以勉强也算是一种方法。其实此法和xmlhttp类似,也是GET或POST方式发送请求,然后得到服务器的response返回到单元格内。优点:excel自带,可以通过录制宏得到代码,处理table很方便。代码简短,适合快速获取一些存在于源代码的table里的数据。缺点:无法模拟referer等发包头(如果你有在QT中模拟referer的方法,请一定告诉我)网抓主题代码:Sub Main() Dim strText As String With CreateObject("MSXML2.XMLHTTP") 'CreateObject("WinHttp.WinHttpRequest.5.1")' .Open "POST", "", False .setRequestHeader "Content-Type", "application/x-www-form-urlencoded" .setRequestHeader "Referer", "" .Send strText = .responsetext Debug.Print strText End WithEnd Sub拷贝剪切板:Sub CopyToClipbox(strText As String) '文本拷贝到剪贴板 With CreateObject("new:1C3B4210-F441-11CE-B9EA-00AA006B1A69") .SetText strText .PutInClipboard End WithEnd SubDongYu作业1.rar(18.29 KB, 下载次数: 88)2014-10-21 17:05 上传下载次数: 88Sub HomerWork1_1()'新手:DongYu'作业:1、网站:http:/data.bank.hexun.com/lccp/jrxp.aspx' 操作:点击“今日在售产品”,获取今日在售产品第一页的数据。 Dim xml As New MSXML2.XMLHTTP, url As String, St As String Dim arr, brr, ar, i, c url = "http:/data.bank.hexun.com/lccp/Jrxp.aspx?col=1&tag=desc&date=2014-10-21&page=2" With xml .Open "GET", url, False .send St = .responseText End With St = Split(Split(St, "<div class=""mark"">")(1), "</div>")(0) arr = Split(St, "<tr align='center'>") ReDim brr(1 To UBound(arr), 1 To 9) For i = 1 To UBound(arr) ar = arr(i) brr(i, 1) = Split(Split(ar, "value='")(1), "'")(0) + Split(Split(ar, "<font class='cred'>")(1), "</font>")(0) brr(i, 2) = Split(Split(ar, "</font></td><td class='hl'>")(1), "</td>")(0) brr(i, 3) = Split(Split(ar, "<td class='on'>")(1), "</td>")(0) brr(i, 4) = Split(Split(ar, "<td class='hl'>")(1), "</td>")(0) brr(i, 5) = Split(Split(ar, "<td class='hl'>")(2), "</td>")(0) brr(i, 6) = Split(Split(ar, "<td class='hl'>")(3), "</td>")(0) brr(i, 7) = Split(Split(ar, "<td class='hl'>")(4), "</td>")(0) brr(i, 8) = Split(Split(ar, "<td class='hl'>")(5), "</td>")(0) brr(i, 9) = Split(Split(Split(ar, "<td class='hl'>")(5), "</td>")(1), ">")(1) Next i With ActiveSheet .Cells.Clear .Columns("D:E").NumberFormatLocal = "yyyy-m-d" .a1.Resize(1, 10) = "对比","产品名称","银行","起售日","停售日","币种","管理期(月)","产品类型","预期收益(%)","收益" .b2.Resize(UBound(brr, 1), 9) = brr End WithEnd SubSub 按钮2_单击() Dim url, html url = "http:/webflight.linkosky.com/WEB/Flight/FlightSearchResultDefault.aspx?JT=1" url = url & "&OC=PEK" '北京首都机场 url = url & "&DC=SHA" '上海虹口机场 url = url & "&dstDesp=GUANGZHOU%B9%E3%D6%DD" url = url & "&dst2=CAN" url = url & "&DD=2014-10-22" '查询日期 url = url & "&DT=7" url = url & "&BD=" url = url & "&BT=7" url = url & "&AL=ALL" '全部航空 url = url & "&DR=true" url = url & "&image.x=33" url = url & "&image.y=9" url = url & "&Sn=87bf24142bc0c78727610871f373e0a7" Set html = CreateObject("htmlfile") With CreateObject("msxml2.xmlhttp") .Open "get", url, False .send html.body.innerhtml = .responsetext Set tb = html.all.tags("div") For i = 0 To tb.Length - 1 If tb(i).classname = "menu_layout2" Or tb(i).classname = "listone_layout" Or tb(i).classname = "listtwo_layout" Or tb(i).classname = "menu_content_small2" Then n = n + 1 For j = 0 To tb(i).childnodes.Length - 1 Cells(n, j + 1) = tb(i).childnodes(j).innertext Next End If Next End WithEnd SubSub 作业1_2_获取航班信息数据()'网站:http:/www.caac.gov.cn/S1/GNCX/'操作:点击“查询”,获取航班信息数据。 Dim St As String, Url$, arr, brr, Crr Dim S1$, S2$, i%, j%, rng As Range Url = "http:/webflight.linkosky.com/WEB/Flight/FlightSearchResultDefault.aspx?JT=1&OC=PEK&DC=SHA&dstDesp=GUANGZHOU%B9%E3%D6%DD&dst2=CAN&DD=2014-10-22&DT=7&BD=&BT=7&AL=ALL&DR=true&image.x=37&image.y=9&Sn=87bf24142bc0c78727610871f373e0a7" With CreateObject("WinHttp.WinHttpRequest.5.1") .Open "GET", Url, False .Send St = .responsetext End With ' If InStr(St, "<div id=""FlightListFlight0"">") < 1 Then Cells(1, 1) = "抱歉!没有满足条件的航班,请重新输入查询条件! " Else St = Split(Split(St, "<div id=""FlightListFlight0"">")(1), "</div><br>")(0) With ActiveSheet Cells(1, 1) = Split(Split(St, "<strong>")(1), "</strong>")(0) arr = Spl