1. '目前存在的BUG
  2. '图片补丁存在多个URL
  3. Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
  4. Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
  5. Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
  6. Dim lngRetVal As Long
  7. lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
  8. If lngRetVal = 0 Then
  9. DeleteUrlCacheEntry ImageURL '清除缓存
  10. 'MsgBox "成功"
  11. Else
  12. 'MsgBox "失败"
  13. End If
  14. End Sub
  15. Sub LoopGetSubject()
  16. Dim StartTime As Variant
  17. Dim UsedTime As Variant
  18. StartTime = VBA.Timer
  19. Dim Sht As Worksheet
  20. Set Sht = ThisWorkbook.ActiveSheet
  21. With Sht
  22. EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
  23. For i = 2 To EndRow
  24. SetFontRed .Cells(i, 1).Resize(1, 3)
  25. FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
  26. ExamUrl = .Cells(i, 2).Text
  27. Call GetExamTextByUrl(ExamUrl, FindText)
  28. Next i
  29. End With
  30. Set Sht = Nothing
  31. UsedTime = VBA.Timer - StartTime
  32. Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
  33. MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
  34. End Sub
  35. Sub GetSubject()
  36. SetFontRed Application.ActiveCell
  37. FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
  38. ExamUrl = Application.ActiveCell.Offset(0, -1).Text
  39. Call GetExamTextByUrl(ExamUrl, FindText)
  40. End Sub
  41. Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
  42. Dim Subject As String
  43. Dim Question As String
  44. Dim ImageURL As String
  45. Dim Answer As String
  46. Dim HasGetContent As Boolean
  47. Dim docName As String
  48. Dim docPath As String
  49. Dim Independent As Boolean
  50. Dim IsQuestion As Boolean
  51. Dim IsAnswer As Boolean
  52. Dim oneP As Object
  53. Dim nextTag As Object
  54.  
  55. 'send request
  56. With CreateObject("MSXML2.XMLHTTP")
  57. .Open "GET", ExamUrl, False
  58. .Send
  59. WebText = .responsetext
  60. 'Debug.Print WebText
  61. End With
  62. With CreateObject("htmlfile")
  63. .write WebText
  64. Set examdiv = .getElementById("sina_keyword_ad_area2")
  65. '获取试卷文本内容
  66. ExamText = examdiv.innerText
  67. '判断试卷是否含有独立答案
  68. Independent = ExamText Like "*参考答案*"
  69. 'Debug.Print " Independent "; Independent
  70. '设定搜集题目Word文档名称和路径
  71. docName = Application.ActiveSheet.Name & "_题目搜集.doc"
  72. docPath = ThisWorkbook.Path & "\" & docName
  73. '判断某个段落是否为题目/答案的开始
  74. IsQuestion = False
  75. IsAnswer = False
  76. '判断是否已经提取到内容
  77. HasGetContent = False
  78. '循环所有段落
  79. For Each oneP In .getElementsByTagName("p")
  80. If HasGetContent = False Then
  81. '判断某段内容是否为题号行
  82. If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
  83. Subject = ""
  84. Question = ""
  85. ImageURL = ""
  86. Answer = ""
  87. '开始记录题干内容
  88. Subject = oneP.innerText
  89. 'Debug.Print OneP.innerText
  90. Else
  91. If InStr(oneP.innerText, FindText) = 0 Then
  92. '过滤不相干的问题,仅保留符合条件的问题
  93. If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
  94. '继续记录问题内容
  95. Subject = Subject & oneP.innerText
  96. End If
  97. End If
  98. End If
  99. '提取题目图片的地址
  100. Set nextTag = oneP.NextSibling
  101. If Not nextTag Is Nothing Then
  102. If UCase(nextTag.tagName) = "A" Then
  103. If nextTag.HasChildNodes Then
  104. If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
  105. ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
  106. 'Debug.Print ImageURL
  107. End If
  108. End If
  109. End If
  110. End If
  111.  
  112. '提取题目的序号和问题的序号
  113. If InStr(oneP.innerText, FindText) > 0 Then
  114. SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
  115. Question = oneP.innerText
  116. questionIndex = RegGet(Question, "[\((](\d)[\))].*")
  117. 'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
  118. HasGetContent = True
  119. End If
  120.  
  121. Else
  122. '提取内容后 开始找答案
  123. '试卷不含独立答案,答案就附在每道题后面
  124. If Independent = False Then
  125.  
  126. If IsAnswer = False Then
  127. If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
  128. Answer = oneP.innerText
  129. IsAnswer = True
  130. 'Exit For
  131. End If
  132. Else
  133. Debug.Print oneP.innerText
  134. If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
  135. Exit For
  136. Else
  137. Answer = Answer & oneP.innerText
  138. End If
  139. End If
  140.  
  141. Else
  142. '试卷还有独立参考答案
  143. '判断某段内容的题号是否符合条件
  144. If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
  145. IsQuestion = True
  146. 'Debug.Print isQuestion
  147. End If
  148. If IsQuestion = True Then
  149. '判断某段内容的问题序号是否符合条件
  150. If IsAnswer = False Then
  151. If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
  152. '记录问题答案
  153. Answer = oneP.innerText
  154. IsAnswer = True
  155. 'Exit For
  156. End If
  157. Else
  158. Debug.Print oneP.innerText
  159. If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
  160. Exit For
  161. Else
  162. Answer = Answer & oneP.innerText
  163. End If
  164. End If
  165. End If
  166. End If
  167. End If
  168. Next oneP
  169. '图片地址处理
  170. ImageURL = Mid(ImageURL, 2)
  171. '测试
  172. Debug.Print Subject
  173. Debug.Print ImageURL
  174. Debug.Print Question
  175. Debug.Print Answer
  176. End With
  177.  
  178. '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
  179. If Len(ImageURL) = 0 Then
  180. hasimagetext = Split(WebText, FindText)(0)
  181. hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
  182. ImageURL = Split(hasimagetext, """")(1)
  183. End If
  184.  
  185. '输出题目内容到Word文档
  186. Dim wdApp As Object
  187. Dim Doc As Object
  188.  
  189. On Error Resume Next
  190. Set wdApp = GetObject(, "Word.Application")
  191. On Error GoTo 0
  192. If Not wdApp Is Nothing Then
  193. wdApp.Visible = True
  194. On Error Resume Next
  195. Set Doc = wdApp.documents(docName)
  196. On Error GoTo 0
  197. If Doc Is Nothing Then
  198. Set Doc = wdApp.documents.Add()
  199. Doc.SaveAs docPath
  200. End If
  201. Else
  202. Set wdApp = CreateObject("Word.Application")
  203. wdApp.Visible = True
  204. Set Doc = wdApp.documents.Add()
  205. Doc.SaveAs docPath
  206. End If
  207.  
  208. Doc.Activate
  209. wdApp.Selection.EndKey 6
  210. wdApp.Selection.TypeParagraph
  211. wdApp.Selection.InsertBreak 7
  212. '输出题干内容
  213. wdApp.Selection.TypeText Text:=Subject
  214. wdApp.Selection.TypeParagraph
  215.  
  216. '下载图片并插入WORD文档
  217. If ImageURL <> "" Then
  218. If InStr(ImageURL, "|") = 0 Then
  219. ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
  220. DownloadImageName ImageURL, ImagePath
  221. wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
  222. wdApp.Selection.TypeParagraph
  223. Kill ImagePath
  224. 'Stop
  225. Else
  226. ImageURLs = Split(ImageURL, "|")
  227. For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
  228. ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
  229. DownloadImageName ImageURL, ImagePath
  230. wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
  231. wdApp.Selection.TypeParagraph
  232. Kill ImagePath
  233. Next n
  234. End If
  235. End If
  236. '输出问题内容
  237. wdApp.Selection.TypeText Text:=Question
  238. wdApp.Selection.TypeParagraph
  239. '输出答案内容
  240. wdApp.Selection.TypeText Text:="【答案】" & Answer
  241. wdApp.Selection.TypeParagraph
  242. Set wdApp = Nothing
  243. Set Doc = Nothing
  244. Set oneP = Nothing
  245. End Sub
  246. Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
  247. '传递参数 :原字符串, 匹配模式
  248. Dim Regex As Object
  249. Set Regex = CreateObject("VBScript.RegExp")
  250. With Regex
  251. .Global = True
  252. .Pattern = Pattern
  253. End With
  254. RegTest = Regex.test(OrgText)
  255. Set Regex = Nothing
  256. End Function
  257. Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
  258. '传递参数 :原字符串, 匹配模式
  259. Dim Regex As Object
  260. Dim Mh As Object
  261. Set Regex = CreateObject("VBScript.RegExp")
  262. With Regex
  263. .Global = True
  264. .Pattern = Pattern
  265. End With
  266. If Regex.test(OrgText) Then
  267. Set Mh = Regex.Execute(OrgText)
  268. RegGet = Mh.Item(0).submatches(0)
  269. Else
  270. RegGet = ""
  271. End If
  272. Set Regex = Nothing
  273. End Function
  274. Sub SetFontRed(ByVal Rng As Range)
  275. With Rng.Font
  276. .Color = -16776961
  277. .TintAndShade = 0
  278. End With
  279. End Sub

  

2018-02-16 GetSameTypeQuestion的更多相关文章

  1. 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)

    5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms   ...

  2. 2018.11.16 浪在ACM 集训队第五次测试赛

    2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...

  3. 读书笔记-《Maven实战》-2018/4/16

    第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...

  4. 2018.6.16 PHP小实验

    PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...

  5. AtCoder Beginner Contest 100 2018/06/16

    A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...

  6. 【开发工具】- Idea.2018.02注册码激活

    1.从下面地址下载一个jar包,名称是  JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...

  7. 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...

    摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...

  8. h5视频和音频 -2018/04/16

    HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...

  9. 2018.02.12 noip模拟赛T2

    二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...

  10. 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24

    [iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...

随机推荐

  1. noip模拟【array】

    array by ysy [题目描述] 给定一个长度为n的数列,每次你可以进行以下操作之一: (1)将一个数+a: (2)将一个数-a: (3)将一个数+b: (4)将一个数-b: 你需要将所有数全部 ...

  2. EXCEL 基本函数

    案例2:修改非法日期 TODAY(),显示今天日期,数据格式是日期,如果是常规,就是数字. EXCEL 起始日期,1900/1/1是第一天 日期输入方式要正确 时间数据格式  1:00:00  = 1 ...

  3. POJ-1038 Bugs Integrated, Inc. (状压+滚动数组+深搜 的动态规划)

    本题的题眼很明显,N (1 <= N <= 150), M (1 <= M <= 10),摆明了是想让你用状态压缩dp. 整个思路如下:由于要填2*3或者3*2的芯片,那么就要 ...

  4. Java后台要看的书

    推荐一个 搜书的网站,挺好用的 鸠摩搜书 Java基础 <Head first Java> (入门用) <Java 编程思想> <Java核心技术卷> 并发 < ...

  5. CF1137C Museums Tour

    思路 强连通分量的好题 对于每个博物馆,因为时间的限制条件,不好直接统计, 发现d很小,可以建出d层分层图,原图<u,v>的边变成<u,i>到<v,i+1>的边,& ...

  6. C++类的大小——sizeof(class)

    第一:空类的大小 class CBase { }; 运行cout<<"sizeof(CBase)="<<sizeof(CBase)<<endl; ...

  7. Graphics for R

    https://cran.r-project.org/web/views/Graphics.html CRAN Task View: Graphic Displays & Dynamic Gr ...

  8. SQLServer2008 远程过程调用失败

    今天在连接数据库的时候,发现无法获取到服务器名称,打开SQLServer Configuration Manager,发现SQLServer服务中远程过程调用失败 我装的是VS2017,在网上百度了一 ...

  9. KMP字符串匹配(模板)

    描述: 给出两个字符串 s1 和 s2 ,其中 s2 为 s1 的子串,求出 s2 在 s1 中所有出现的位置.同时要求输出 s2 的 fail 数组. 思路: KMP模板. 标程: #include ...

  10. CentOS6.X、7.X下Jenkins的安装及使用

    一.相关概念 1.1 Jenkins概念: Jenkins是一个功能强大的应用程序,允许持续集成和持续交付项目,无论用的是什么平台.这是一个免费的源代码,可以处理任何类型的构建或持续集成.集成Jenk ...