'目前存在的BUG
'图片补丁存在多个URL
Private Declare Function URLDownloadToFile Lib "urlmon" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
Private Declare Function DeleteUrlCacheEntry Lib "wininet" Alias "DeleteUrlCacheEntryA" (ByVal lpszUrlName As String) As Long
Sub DownloadImageName(ByVal ImageURL As String, ByVal ImagePath As String)
Dim lngRetVal As Long
lngRetVal = URLDownloadToFile(0, ImageURL, ImagePath, 0, 0)
If lngRetVal = 0 Then
DeleteUrlCacheEntry ImageURL '清除缓存
'MsgBox "成功"
Else
'MsgBox "失败"
End If
End Sub
Sub LoopGetSubject()
Dim StartTime As Variant
Dim UsedTime As Variant
StartTime = VBA.Timer
Dim Sht As Worksheet
Set Sht = ThisWorkbook.ActiveSheet
With Sht
EndRow = .Cells(.Cells.Rows.Count, 1).End(xlUp).Row
For i = 2 To EndRow
SetFontRed .Cells(i, 1).Resize(1, 3)
FindText = Mid(.Cells(i, 3).Text, 4, Len(.Cells(i, 3).Text) - 8)
ExamUrl = .Cells(i, 2).Text
Call GetExamTextByUrl(ExamUrl, FindText)
Next i
End With
Set Sht = Nothing
UsedTime = VBA.Timer - StartTime
Debug.Print "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
MsgBox "UsedTime :" & Format(UsedTime, "#0.0000 Seconds")
End Sub
Sub GetSubject()
SetFontRed Application.ActiveCell
FindText = Mid(Application.ActiveCell.Text, 4, Len(Application.ActiveCell.Text) - 8)
ExamUrl = Application.ActiveCell.Offset(0, -1).Text
Call GetExamTextByUrl(ExamUrl, FindText)
End Sub
Sub GetExamTextByUrl(ByVal ExamUrl As String, ByVal FindText As String)
Dim Subject As String
Dim Question As String
Dim ImageURL As String
Dim Answer As String
Dim HasGetContent As Boolean
Dim docName As String
Dim docPath As String
Dim Independent As Boolean
Dim IsQuestion As Boolean
Dim IsAnswer As Boolean
Dim oneP As Object
Dim nextTag As Object 'send request
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", ExamUrl, False
.Send
WebText = .responsetext
'Debug.Print WebText
End With
With CreateObject("htmlfile")
.write WebText
Set examdiv = .getElementById("sina_keyword_ad_area2")
'获取试卷文本内容
ExamText = examdiv.innerText
'判断试卷是否含有独立答案
Independent = ExamText Like "*参考答案*"
'Debug.Print " Independent "; Independent
'设定搜集题目Word文档名称和路径
docName = Application.ActiveSheet.Name & "_题目搜集.doc"
docPath = ThisWorkbook.Path & "\" & docName
'判断某个段落是否为题目/答案的开始
IsQuestion = False
IsAnswer = False
'判断是否已经提取到内容
HasGetContent = False
'循环所有段落
For Each oneP In .getElementsByTagName("p")
If HasGetContent = False Then
'判断某段内容是否为题号行
If oneP.innerText Like "##.*" Or oneP.innerText Like "##.*" Then
Subject = ""
Question = ""
ImageURL = ""
Answer = ""
'开始记录题干内容
Subject = oneP.innerText
'Debug.Print OneP.innerText
Else
If InStr(oneP.innerText, FindText) = 0 Then
'过滤不相干的问题,仅保留符合条件的问题
If Not RegTest(oneP.innerText, "([\((]\d[\))]).*") Then
'继续记录问题内容
Subject = Subject & oneP.innerText
End If
End If
End If
'提取题目图片的地址
Set nextTag = oneP.NextSibling
If Not nextTag Is Nothing Then
If UCase(nextTag.tagName) = "A" Then
If nextTag.HasChildNodes Then
If nextTag.href Like "http://photo.blog.sina.com.cn/showpic.html*" Then
ImageURL = ImageURL & "|" & nextTag.FirstChild.getAttribute("real_src")
'Debug.Print ImageURL
End If
End If
End If
End If '提取题目的序号和问题的序号
If InStr(oneP.innerText, FindText) > 0 Then
SubjectIndex = RegGet(Subject, "(\d{1,2})[..].*")
Question = oneP.innerText
questionIndex = RegGet(Question, "[\((](\d)[\))].*")
'Debug.Print "题序:"; SubjectIndex; " 问序: "; questionIndex
HasGetContent = True
End If Else
'提取内容后 开始找答案
'试卷不含独立答案,答案就附在每道题后面
If Independent = False Then If IsAnswer = False Then
If RegTest(oneP.innerText, "[\((](" & questionIndex & ")[\))].*") Then
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If Else
'试卷还有独立参考答案
'判断某段内容的题号是否符合条件
If RegTest(oneP.innerText, "(" & SubjectIndex & ")[\..].*") Then
IsQuestion = True
'Debug.Print isQuestion
End If
If IsQuestion = True Then
'判断某段内容的问题序号是否符合条件
If IsAnswer = False Then
If RegTest(oneP.innerText, "([\((]" & questionIndex & "[\))]).*") Then
'记录问题答案
Answer = oneP.innerText
IsAnswer = True
'Exit For
End If
Else
Debug.Print oneP.innerText
If RegTest(oneP.innerText, "[\((](\d)[\))].*") Or RegTest(oneP.innerText, "(\d{1,2})[..].*") Then
Exit For
Else
Answer = Answer & oneP.innerText
End If
End If
End If
End If
End If
Next oneP
'图片地址处理
ImageURL = Mid(ImageURL, 2)
'测试
Debug.Print Subject
Debug.Print ImageURL
Debug.Print Question
Debug.Print Answer
End With '【补丁,有待改进】2017年下半年部分图片提取不到的问题修正
If Len(ImageURL) = 0 Then
hasimagetext = Split(WebText, FindText)(0)
hasimagetext = Split(hasimagetext, "real_src")(UBound(Split(hasimagetext, "real_src")))
ImageURL = Split(hasimagetext, """")(1)
End If '输出题目内容到Word文档
Dim wdApp As Object
Dim Doc As Object On Error Resume Next
Set wdApp = GetObject(, "Word.Application")
On Error GoTo 0
If Not wdApp Is Nothing Then
wdApp.Visible = True
On Error Resume Next
Set Doc = wdApp.documents(docName)
On Error GoTo 0
If Doc Is Nothing Then
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If
Else
Set wdApp = CreateObject("Word.Application")
wdApp.Visible = True
Set Doc = wdApp.documents.Add()
Doc.SaveAs docPath
End If Doc.Activate
wdApp.Selection.EndKey 6
wdApp.Selection.TypeParagraph
wdApp.Selection.InsertBreak 7
'输出题干内容
wdApp.Selection.TypeText Text:=Subject
wdApp.Selection.TypeParagraph '下载图片并插入WORD文档
If ImageURL <> "" Then
If InStr(ImageURL, "|") = 0 Then
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
'Stop
Else
ImageURLs = Split(ImageURL, "|")
For n = LBound(ImageURLs) To UBound(ImageURLs) Step 1
ImagePath = ThisWorkbook.Path & Application.PathSeparator & "tmp.jpg"
DownloadImageName ImageURL, ImagePath
wdApp.Selection.InlineShapes.AddPicture Filename:=ImagePath, LinkToFile:=False, SaveWithDocument:=True
wdApp.Selection.TypeParagraph
Kill ImagePath
Next n
End If
End If
'输出问题内容
wdApp.Selection.TypeText Text:=Question
wdApp.Selection.TypeParagraph
'输出答案内容
wdApp.Selection.TypeText Text:="【答案】" & Answer
wdApp.Selection.TypeParagraph
Set wdApp = Nothing
Set Doc = Nothing
Set oneP = Nothing
End Sub
Private Function RegTest(ByVal OrgText As String, ByVal Pattern As String) As Boolean
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
RegTest = Regex.test(OrgText)
Set Regex = Nothing
End Function
Public Function RegGet(ByVal OrgText As String, ByVal Pattern As String) As String
'传递参数 :原字符串, 匹配模式
Dim Regex As Object
Dim Mh As Object
Set Regex = CreateObject("VBScript.RegExp")
With Regex
.Global = True
.Pattern = Pattern
End With
If Regex.test(OrgText) Then
Set Mh = Regex.Execute(OrgText)
RegGet = Mh.Item(0).submatches(0)
Else
RegGet = ""
End If
Set Regex = Nothing
End Function
Sub SetFontRed(ByVal Rng As Range)
With Rng.Font
.Color = -16776961
.TintAndShade = 0
End With
End Sub

  

2018-02-16 GetSameTypeQuestion的更多相关文章

  1. 5820. 【NOIP提高A组模拟2018.8.16】 非法输入(模拟,字符串)

    5820. [NOIP提高A组模拟2018.8.16] 非法输入 (File IO): input:aplusb.in output:aplusb.out Time Limits: 1000 ms   ...

  2. 2018.11.16 浪在ACM 集训队第五次测试赛

    2018.11.16 浪在ACM 集训队第五次测试赛 整理人:李继朋 Problem A : 参考博客:[1]朱远迪 Problem B : 参考博客: Problem C : 参考博客:[1]马鸿儒 ...

  3. 读书笔记-《Maven实战》-2018/4/16

    第一章:Maven简介 1:Maven:Maven原本的单词意思为"知识的积累",谷歌翻译为"行家",而作为Apache的开源项目,Maven是一个主要服务于基 ...

  4. 2018.6.16 PHP小实验

    PHP实验 实验一 <?php /** * Created by PhpStorm. * User: qichunlin * Date: 2018/5/17 * Time: 下午5:35 */ ...

  5. AtCoder Beginner Contest 100 2018/06/16

    A - Happy Birthday! Time limit : 2sec / Memory limit : 1000MB Score: 100 points Problem Statement E8 ...

  6. 【开发工具】- Idea.2018.02注册码激活

    1.从下面地址下载一个jar包,名称是  JetbrainsCrack-3.1-release-enc.jar 下载地址: 链接: https://pan.baidu.com/s/1VZjklI3qh ...

  7. 本周ASP.NET英文技术文章推荐[02/03 - 02/16]:MVC、Visual Studio 2008、安全性、性能、LINQ to JavaScript、jQuery...

    摘要 继续坚持,继续推荐.本期共有9篇文章: 最新的ASP.NET MVC框架开发计划 Visual Studio 2008 Web开发相关的Hotfix发布 ASP.NET安全性教程系列 ASP.N ...

  8. h5视频和音频 -2018/04/16

    HTML5 规定了一种通过 video 元素来包含视频的标准方法. 当前video元素支持的三种视频格式: (1)Ogg 带有Theora视频编码和Vorbis音频编码的ogg文件 (2)MPEG4带 ...

  9. 2018.02.12 noip模拟赛T2

    二兵的赌注 Description游戏中,二兵要进入了一家奇怪的赌场.赌场中有n个庄家,每个庄家都可以猜大猜小,猜一次一元钱.每一次开彩前,你都可以到任意个庄家那里下赌注.如果开彩结果是大,你就可以得 ...

  10. 【资料下载区】【iCore4相关代码、资料下载地址】更新日期2018/02/24

    [iCore4相关文档][更新中...] iCore4原理图(PDF)下载iCore4引脚注释(PDF)下载iCore4机械尺寸(PDF)下载 [iCore4相关例程代码][ARM] DEMO测试程序 ...

随机推荐

  1. log4j2使用介绍

    工作中,用到了log4j2,以前只接触过log4j,也没有太过深入,这次就稍微系统的学习了以下log4j2. 一.引入pom.xml 使用maven作为项目的构建环境,pom.xml使用slf4j,s ...

  2. FBX SDK在vs 2010下面的配置

    1.下载FBS SDK.地址.因为我是vs2010,所以我下载的是FBX SDK 2016.1.2 VS2010.如果没有了,你可以找博主直接要,QQ1240957820. 2.下载下来的是一个exe ...

  3. ps一些疑问知识点

    PS 的核心, 是 选择, 是 抠图, 不管是蒙版, 通道也好等等, 其实主要的作用还是 抠图. 还是精确地 选出你要处理的 内容对象! 如何改变工具预设? 使用工具预设, 可以将你当前正在使用的 / ...

  4. CF776B Sherlock and his girlfriend

    题目地址 题目链接 题解 这题很有意思啊qwq.本来是写算出每个数的质约数的,然后写到一半发现,质约数互相不影响,有质约数的数肯定是合数. 所以合数染一种色,质数染一种色就好 #include < ...

  5. spring boot + session+redis解决session共享问题

    自己没有亲自试过,不过看了下这个例子感觉靠谱,以后做了测试,在加以说明. PS:后期经验证,上面例子可行.我们平时存session里面的值,直接存在了redis里面了.

  6. PTA 7-2 列车调度(25 分)

    7-2 列车调度(25 分) 火车站的列车调度铁轨的结构如下图所示. 两端分别是一条入口(Entrance)轨道和一条出口(Exit)轨道,它们之间有N条平行的轨道.每趟列车从入口可以选择任意一条轨道 ...

  7. Robot Framework+AutoItLibrary+AutoIt使用

    使用记录: 1. 打开被测桌面程序: 2. 打开AutoIt,用finder tool拖拽到控件上,可以看到控件的信息: 3. 如果空间的Title.Control Info抓不到,可以看Mouse下 ...

  8. Latex: 添加IEEE会议论文作者信息

    参考: Multiple Authors with common affiliations in IEEEtran conference template Latex: 添加IEEE会议论文作者信息 ...

  9. Ubuntu14.04 clang3.8 Installation Guide

    Reference Installing clang 3.8 on Ubuntu 14.04.3. Ubuntu14.04 clang3.8 Installation Guide 1.add the ...

  10. jquery选择器扩展之样式选择器

    https://github.com/wendux/style-selector-jQuery-plugin http://blog.csdn.net/duwen90/article/details/ ...