2018-02-16 GetSameTypeQuestion
- '目前存在的BUG
- '图片补丁存在多个URL
- Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
- Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
- Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
- Dim lngRetVal As Long
- lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
- If lngRetVal = 0 Then
- DeleteUrlCacheEntry ImageURL '清除缓存
- 'MsgBox "成功"
- Else
- 'MsgBox "失败"
- End If
- End Sub
- Sub LoopGetSubject()
- Dim StartTime As Variant
- Dim UsedTime As Variant
- StartTime = VBA.Timer
- Dim Sht As Worksheet
- Set Sht = ThisWorkbook.ActiveSheet
- With Sht
- EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
- For i = 2 To EndRow
- SetFontRed .Cells(i, 1).Resize(1, 3)
- FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
- ExamUrl = .Cells(i, 2).Text
- Call GetExamTextByUrl(ExamUrl, FindText)
- Next i
- End With
- Set Sht = Nothing
- UsedTime = VBA.Timer - StartTime
- Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
- MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
- End Sub
- Sub GetSubject()
- SetFontRed Application.ActiveCell
- FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
- ExamUrl = Application.ActiveCell.Offset(0, -1).Text
- Call GetExamTextByUrl(ExamUrl, FindText)
- End Sub
- Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
- Dim Subject As String
- Dim Question As String
- Dim ImageURL As String
- Dim Answer As String
- Dim HasGetContent As Boolean
- Dim docName As String
- Dim docPath As String
- Dim Independent As Boolean
- Dim IsQuestion As Boolean
- Dim IsAnswer As Boolean
- Dim oneP As Object
- Dim nextTag As Object
- 'send request
- With CreateObject("MSXML2.XMLHTTP")
- .Open "GET", ExamUrl, False
- .Send
- WebText = .responsetext
- 'Debug.Print WebText
- End With
- With CreateObject("htmlfile")
- .write WebText
- Set examdiv = .getElementById("sina_keyword_ad_area2")
- '获取试卷文本内容
- ExamText = examdiv.innerText
- '判断试卷是否含有独立答案
- Independent = ExamText Like "*参考答案*"
- 'Debug.Print " Independent "; Independent
- '设定搜集题目Word文档名称和路径
- docName = Application.ActiveSheet.Name & "_题目搜集.doc"
- docPath = ThisWorkbook.Path & "\" & docName
- '判断某个段落是否为题目/答案的开始
- IsQuestion = False
- IsAnswer = False
- '判断是否已经提取到内容
- HasGetContent = False
- '循环所有段落
- For Each oneP In .getElementsByTagName("p")
- If HasGetContent = False Then
- '判断某段内容是否为题号行
- If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
- Subject = ""
- Question = ""
- ImageURL = ""
- Answer = ""
- '开始记录题干内容
- Subject = oneP.innerText
- 'Debug.Print OneP.innerText
- Else
- If InStr(oneP.innerText, FindText) = 0 Then
- '过滤不相干的问题,仅保留符合条件的问题
- If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
- '继续记录问题内容
- Subject = Subject & oneP.innerText
- End If
- End If
- End If
- '提取题目图片的地址
- Set nextTag = oneP.NextSibling
- If Not nextTag Is Nothing Then
- If UCase(nextTag.tagName) = "A" Then
- If nextTag.HasChildNodes Then
- If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
- ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
- 'Debug.Print ImageURL
- End If
- End If
- End If
- End If
- '提取题目的序号和问题的序号
- If InStr(oneP.innerText, FindText) > 0 Then
- SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
- Question = oneP.innerText
- questionIndex = RegGet(Question, "[\((](\d)[\))].*")
- 'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
- HasGetContent = True
- End If
- Else
- '提取内容后 开始找答案
- '试卷不含独立答案,答案就附在每道题后面
- If Independent = False Then
- If IsAnswer = False Then
- If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
- Answer = oneP.innerText
- IsAnswer = True
- 'Exit For
- End If
- Else
- Debug.Print oneP.innerText
- If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
- Exit For
- Else
- Answer = Answer & oneP.innerText
- End If
- End If
- Else
- '试卷还有独立参考答案
- '判断某段内容的题号是否符合条件
- If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
- IsQuestion = True
- 'Debug.Print isQuestion
- End If
- If IsQuestion = True Then
- '判断某段内容的问题序号是否符合条件
- If IsAnswer = False Then
- If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
- '记录问题答案
- Answer = oneP.innerText
- IsAnswer = True
- 'Exit For
- End If
- Else
- Debug.Print oneP.innerText
- If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
- Exit For
- Else
- Answer = Answer & oneP.innerText
- End If
- End If
- End If
- End If
- End If
- Next oneP
- '图片地址处理
- ImageURL = Mid(ImageURL, 2)
- '测试
- Debug.Print Subject
- Debug.Print ImageURL
- Debug.Print Question
- Debug.Print Answer
- End With
- '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
- If Len(ImageURL) = 0 Then
- hasimagetext = Split(WebText, FindText)(0)
- hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
- ImageURL = Split(hasimagetext, """")(1)
- End If
- '输出题目内容到Word文档
- Dim wdApp As Object
- Dim Doc As Object
- On Error Resume Next
- Set wdApp = GetObject(, "Word.Application")
- On Error GoTo 0
- If Not wdApp Is Nothing Then
- wdApp.Visible = True
- On Error Resume Next
- Set Doc = wdApp.documents(docName)
- On Error GoTo 0
- If Doc Is Nothing Then
- Set Doc = wdApp.documents.Add()
- Doc.SaveAs docPath
- End If
- Else
- Set wdApp = CreateObject("Word.Application")
- wdApp.Visible = True
- Set Doc = wdApp.documents.Add()
- Doc.SaveAs docPath
- End If
- Doc.Activate
- wdApp.Selection.EndKey 6
- wdApp.Selection.TypeParagraph
- wdApp.Selection.InsertBreak 7
- '输出题干内容
- wdApp.Selection.TypeText Text:=Subject
- wdApp.Selection.TypeParagraph
- '下载图片并插入WORD文档
- If ImageURL <> "" Then
- If InStr(ImageURL, "|") = 0 Then
- ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
- DownloadImageName ImageURL, ImagePath
- wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
- wdApp.Selection.TypeParagraph
- Kill ImagePath
- 'Stop
- Else
- ImageURLs = Split(ImageURL, "|")
- For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
- ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
- DownloadImageName ImageURL, ImagePath
- wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
- wdApp.Selection.TypeParagraph
- Kill ImagePath
- Next n
- End If
- End If
- '输出问题内容
- wdApp.Selection.TypeText Text:=Question
- wdApp.Selection.TypeParagraph
- '输出答案内容
- wdApp.Selection.TypeText Text:="【答案】" & Answer
- wdApp.Selection.TypeParagraph
- Set wdApp = Nothing
- Set Doc = Nothing
- Set oneP = Nothing
- End Sub
- Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
- '传递参数 :原字符串, 匹配模式
- Dim Regex As Object
- Set Regex = CreateObject("VBScript.RegExp")
- With Regex
- .Global = True
- .Pattern = Pattern
- End With
- RegTest = Regex.test(OrgText)
- Set Regex = Nothing
- End Function
- Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
- '传递参数 :原字符串, 匹配模式
- Dim Regex As Object
- Dim Mh As Object
- Set Regex = CreateObject("VBScript.RegExp")
- With Regex
- .Global = True
- .Pattern = Pattern
- End With
- If Regex.test(OrgText) Then
- Set Mh = Regex.Execute(OrgText)
- RegGet = Mh.Item(0).submatches(0)
- Else
- RegGet = ""
- End If
- Set Regex = Nothing
- End Function
- Sub SetFontRed(ByVal Rng As Range)
- With Rng.Font
- .Color = -16776961
- .TintAndShade = 0
- End With
- End Sub
2018-02-16 GetSameTypeQuestion的更多相关文章
- 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)
5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms ...
- 2018.11.16 浪在ACM 集训队第五次测试赛
2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...
- 读书笔记-《Maven实战》-2018/4/16
第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...
- 2018.6.16 PHP小实验
PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...
- AtCoder Beginner Contest 100 2018/06/16
A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...
- 【开发工具】- Idea.2018.02注册码激活
1.从下面地址下载一个jar包,名称是 JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...
- 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...
摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...
- h5视频和音频 -2018/04/16
HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...
- 2018.02.12 noip模拟赛T2
二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...
- 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24
[iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...
随机推荐
- noip模拟【array】
array by ysy [题目描述] 给定一个长度为n的数列,每次你可以进行以下操作之一: (1)将一个数+a: (2)将一个数-a: (3)将一个数+b: (4)将一个数-b: 你需要将所有数全部 ...
- EXCEL 基本函数
案例2:修改非法日期 TODAY(),显示今天日期,数据格式是日期,如果是常规,就是数字. EXCEL 起始日期,1900/1/1是第一天 日期输入方式要正确 时间数据格式 1:00:00 = 1 ...
- POJ-1038 Bugs Integrated, Inc. (状压+滚动数组+深搜 的动态规划)
本题的题眼很明显,N (1 <= N <= 150), M (1 <= M <= 10),摆明了是想让你用状态压缩dp. 整个思路如下:由于要填2*3或者3*2的芯片,那么就要 ...
- Java后台要看的书
推荐一个 搜书的网站,挺好用的 鸠摩搜书 Java基础 <Head first Java> (入门用) <Java 编程思想> <Java核心技术卷> 并发 < ...
- CF1137C Museums Tour
思路 强连通分量的好题 对于每个博物馆,因为时间的限制条件,不好直接统计, 发现d很小,可以建出d层分层图,原图<u,v>的边变成<u,i>到<v,i+1>的边,& ...
- C++类的大小——sizeof(class)
第一:空类的大小 class CBase { }; 运行cout<<"sizeof(CBase)="<<sizeof(CBase)<<endl; ...
- Graphics for R
https://cran.r-project.org/web/views/Graphics.html CRAN Task View: Graphic Displays & Dynamic Gr ...
- SQLServer2008 远程过程调用失败
今天在连接数据库的时候,发现无法获取到服务器名称,打开SQLServer Configuration Manager,发现SQLServer服务中远程过程调用失败 我装的是VS2017,在网上百度了一 ...
- KMP字符串匹配(模板)
描述: 给出两个字符串 s1 和 s2 ,其中 s2 为 s1 的子串,求出 s2 在 s1 中所有出现的位置.同时要求输出 s2 的 fail 数组. 思路: KMP模板. 标程: #include ...
- CentOS6.X、7.X下Jenkins的安装及使用
一.相关概念 1.1 Jenkins概念: Jenkins是一个功能强大的应用程序,允许持续集成和持续交付项目,无论用的是什么平台.这是一个免费的源代码,可以处理任何类型的构建或持续集成.集成Jenk ...