asp数据采集数据采集程序
' On Error Resume Next Server.Scripttimeout = 300 ' --------------------------------------------------------------------- ' 采集数据 Function getHTTPData(url) dim http set http = Server.createobject( " Msxml2.XMLHTTP " ) if instr (url, " http:// " ) = 0 then url = " http:// " & url Http.open " GET " ,url, false Http.send() if Http.Status <> 200 then exit function getHTTPData = bytesToBSTR(Http.responseBody, " UTF-8 " ) set http = nothing if err.number <> 0 then err.Clear sCharset = "" End function ' --------------------------------------------------------------------- Function BytesToBstr(body,Cset) dim objstream set objstream = Server.CreateObject( " adodb.stream " ) objstream.Type = 1 objstream.Mode = 3 objstream.Open objstream.Write body objstream.Position = 0 objstream.Type = 2 objstream.Charset = Cset BytesToBstr = objstream.ReadText objstream.Close set objstream = nothing End Function ' --------------------------------------------------------------------- ' 服务器登录 Function login(url) dim http set http = Server.createobject( " Msxml2.XMLHTTP " ) if instr (url, " http:// " ) = 0 then url = " http:// " & url Http.open " GET " ,url, false Http.send() if Http.Status <> 200 then exit function set http = nothing if err.number <> 0 then err.Clear End function ' --------------------------------------------------------------------- ' 正则替换 Function ReplaceText(fString,patrn, replStr) Set regEx = New RegExp regEx.Pattern = patrn regEx.IgnoreCase = True regEx.Global = True ReplaceText = regEx.Replace(fString, replStr) End Function ' --------------------------------------------------------------------- ' 去标签 包括内容 Function ReplaceTag(str, tag) Set regEx = New RegExp regEx.Pattern = " < " & tag & " [^>]*?>.*?<\/ " & tag & " > " regEx.IgnoreCase = True regEx.Global = True ReplaceTag = regEx.Replace(str, "" ) End Function ' --------------------------------------------------------------------- ' 去标签 不包括内容 Function ReplaceTab(str, tag) Set regEx = New RegExp regEx.Pattern = " <\/? " & tag & " [^>]*> " regEx.IgnoreCase = True regEx.Global = True ReplaceTab = regEx.Replace(str, "" ) End Function ' --------------------------------------------------------------------- ' 去标签属性 保留标签 Function ReplaceinnerTag(str, tag) Set regEx = New RegExp regEx.Pattern = " (<\/? " & tag & " )[^>]*> " regEx.IgnoreCase = True regEx.Global = True ReplaceinnerTag = regEx.Replace(str, " $1> " ) End Function ' --------------------------------------------------------------------- ' 按正则取数据 Function getText(fString, patrn,n) dim Matches, tStr tStr = fString Set re = New Regexp re.IgnoreCase = True re.Global = True re.Pattern = patrn set Matches = re.Execute(tStr) set re = nothing rStr = "" For Each Match in Matches rStr = Match.SubMatches(n) exit for Next getText = rStr End Function ' --------------------------------------------------------------------- ' 数据过滤 Function Encode_text(str) If Isnull (str) Then Encode_text = "" Exit Function End If str = ReplaceText(str, " <\/?br[^>]*> " , vbCrlf ) str = ReplaceText(str, " <\/?p[^>]*> " , vbCrlf ) str = ReplaceTab(str, " [a-zA-Z] " ) str = ReplaceText(str, " \n\s*\r " , Chr ( 10 ) & Chr ( 13 )) str = Replace (str, " & " , " & " ) str = Replace (str, " ; " , " ; " ) str = Replace (str, " & " , " & " ) str = Replace (str, Chr ( 34 ), " " " ) str = Replace (str, " ' " , " ' " ) str = Replace (str, " < " , " < " ) str = Replace (str, " > " , " > " ) str = Replace (str, " ( " , " ( " ) str = Replace (str, " ) " , " ) " ) str = Replace (str, " * " , " * " ) str = Replace (str, " % " , " % " ) str = Replace (str,vbCrlf, " <br/> " ) Encode_text = str End Function ' --------------------------------------------------------------------- ' 通过Matches取数据 dim Matches sub setMatches(str,sRe) Set re = New Regexp re.IgnoreCase = True re.Global = True re.Pattern = sRe set Matches = re.Execute(str) set re = nothing end sub ' ---------------------------------------------------------------------例子
' 例子 call setMatches(textcontent, re) For Each Match in Matches response.write Match.value Next转载于:https://www.cnblogs.com/cloudgamer/articles/1036383.html
相关资源:数据采集源码web版asp.net