fcp-html.js 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. /**
  2. * 注册命名空间
  3. */
  4. baidu.namespace.register("baidu.html");
  5. /**
  6. * html相关处理
  7. * @author zhaoxianlie
  8. */
  9. baidu.html = (function(){
  10. /**
  11. * 页面源代码
  12. */
  13. var _pageSource = '';
  14. /**
  15. * 结果集
  16. */
  17. var _summaryInformation = null;
  18. /**
  19. * 初始化侦测结果
  20. */
  21. var _initSummaryInformation = function(){
  22. _summaryInformation = {
  23. HTMLBase: {
  24. HTMLDeprecatedAttribute: {}, //过期的属性
  25. HTMLDeprecatedTag: {} //过期的标签
  26. },
  27. documentMode: {
  28. hasDocType: false, //是否设置了doctype
  29. compatMode: {
  30. IE: 'Q', //IE中的compatMode
  31. WebKit: 'Q' //Webkit中的compatMode
  32. },
  33. publicId: '', //doctype中的publicId
  34. hasComment: false, //doctype前是否有注释
  35. hasConditionalComment: false, //doctype前是否有条件注释
  36. isUnusualDocType: false //是否为怪异的doctype
  37. },
  38. DOM: {
  39. IECondComm: [], //所有的IE条件注释
  40. FFNotSptComm: [], //Firefox中不支持的注释:不能出现‘--’
  41. allComm:[], //所有的注释
  42. count : 0, //所有节点的数量
  43. invalidInput:{
  44. count:0, //不合法的input数量
  45. input:[] //不合法的input集合
  46. },
  47. maxDepth : {
  48. xpath : '', //xpath
  49. depth : 1 //深度
  50. }
  51. },
  52. title: [], //HTML的title检测
  53. LINK: {
  54. notInHead: [] //不在Head标签内部的Link标签
  55. },
  56. ID: {
  57. ids: {}, //重复的ID
  58. count : 0 //出现重复ID的个数
  59. },
  60. tagInclude : [], //标签的包含关系
  61. unClosedTags : [], //未闭合的标签
  62. htmlMinified : true , //HTML是否压缩过
  63. imgTag : [] //Img标签的检测,src为空否
  64. };
  65. };
  66. /**
  67. * 检测某个标签是否为过时的标签
  68. * @param {Object} tagName
  69. */
  70. var _isHTMLDeprecatedTag = function(tagName) {
  71. return HTML_DEPRECATED_TAGS[tagName.toLowerCase()];
  72. };
  73. /**
  74. * 判断某个属性是否已过时
  75. * @param {Object} tagName 待检测的标签
  76. * @param {Object} attrName 待检测的属性
  77. */
  78. var _isHTMLDeprecatedAttribute = function(tagName, attrName){
  79. tagName = tagName.toLowerCase();
  80. attrName = attrName.toLowerCase();
  81. return (HTML_DEPRECATED_ATTRIBUTES[attrName] && HTML_DEPRECATED_ATTRIBUTES[attrName][tagName]);
  82. };
  83. /**
  84. * 将检测到的过时标签记录到结果集中
  85. * @param {Object} element
  86. */
  87. var _detectDeprecatedTag = function(element){
  88. var tagName = element.tagName.toLowerCase();
  89. if (_isHTMLDeprecatedTag(tagName)) {
  90. var HTMLDeprecatedTag = _summaryInformation.HTMLBase.HTMLDeprecatedTag;
  91. if (!HTMLDeprecatedTag[tagName]) {
  92. HTMLDeprecatedTag[tagName] = 0;
  93. }
  94. HTMLDeprecatedTag[tagName]++;
  95. }
  96. };
  97. /**
  98. * 将检测到的过时属性记录到结果集中
  99. * @param {Object} element
  100. */
  101. var _detectDeprecatedAttribute = function(element){
  102. var tagName = element.tagName.toLowerCase();
  103. var attributes = element.attributes;
  104. var HTMLDeprecatedAttribute = _summaryInformation.HTMLBase.HTMLDeprecatedAttribute;
  105. for (var j = 0, c = attributes.length; j < c; ++j) {
  106. var attrName = attributes[j].name;
  107. if (_isHTMLDeprecatedAttribute(tagName, attrName)) {
  108. if (!HTMLDeprecatedAttribute[attrName]) {
  109. HTMLDeprecatedAttribute[attrName] = {};
  110. }
  111. if(!HTMLDeprecatedAttribute[attrName][tagName]) {
  112. HTMLDeprecatedAttribute[attrName][tagName] = 0;
  113. }
  114. HTMLDeprecatedAttribute[attrName][tagName]++;
  115. }
  116. }
  117. };
  118. /**
  119. * 获取页面上的符合过滤条件的所有节点,可以是TEXT、COMMENT、HTMLELEMENT等
  120. * @param {Object} rootNode 以该节点作为根节点开始进行搜索
  121. * @param {Integer} nodeFilter 过滤器,从NodeFilter中获得
  122. */
  123. var _getNodes = function(rootNode, nodeFilter){
  124. var nodeIterator = document.createNodeIterator(rootNode, nodeFilter, null, false);
  125. var nodes = [];
  126. var node = nodeIterator.nextNode();
  127. while (node) {
  128. nodes.push(node);
  129. node = nodeIterator.nextNode();
  130. }
  131. return nodes;
  132. };
  133. /**
  134. * 侦测IE条件注释
  135. */
  136. var _detectIECondComm = function(){
  137. var nodes = _getNodes(document.documentElement, NodeFilter.SHOW_COMMENT);
  138. //仅IE支持的注释
  139. var ieCondCommRegExp = /\[\s*if\s*[^\]][\s\w]*\]/i;
  140. //FF的注释中不能出现'--'
  141. var ffNotSupportComReg = /--/g;
  142. for (var i = 0, c = nodes.length; i < c; ++i) {
  143. var currentNode = nodes[i];
  144. if (ieCondCommRegExp.test(currentNode.nodeValue)) {
  145. _summaryInformation.DOM.IECondComm.push(currentNode.nodeValue);
  146. }
  147. if(ffNotSupportComReg.test(currentNode.nodeValue)) {
  148. _summaryInformation.DOM.FFNotSptComm.push(currentNode.nodeValue);
  149. }
  150. _summaryInformation.DOM.allComm.push(currentNode.nodeValue);
  151. }
  152. };
  153. /**
  154. * 侦测documentMode
  155. */
  156. var _detectCompatMode = function() {
  157. _summaryInformation.documentMode = baidu.doctype.getDocMode();
  158. };
  159. /**
  160. * 检测重复的ID
  161. */
  162. var _detectDuplicatedID = function(ids){
  163. var ID = _summaryInformation.ID;
  164. for(var id in ids) {
  165. if(ids[id] > 1) {
  166. ID.ids[id] = ids[id];
  167. ID['count']++;
  168. }
  169. }
  170. };
  171. /**
  172. * 检测页面DOM节点的最大深度
  173. */
  174. var _detectDomMaxDepth = function(dom){
  175. //如果不是html节点,则直接退出
  176. if(dom.nodeType !== 1 || !dom.tagName) return;
  177. //扩展屏蔽
  178. if(dom.id === 'fe-helper-tab-box' || dom.id === 'fe-helper-pb-mask') return;
  179. //最大深度记录
  180. var maxDepth = _summaryInformation.DOM.maxDepth;
  181. var depth = 0;
  182. var curTag , xpath = [];
  183. //深度遍历
  184. do {
  185. //扩展屏蔽
  186. if(dom.id === 'fe-helper-tab-box' || dom.id === 'fe-helper-pb-mask') return;
  187. //忽略SVG节点
  188. if(dom.tagName.toLowerCase() == 'svg') continue;
  189. try{
  190. if(dom.id) { //如果该节点有id,则拼接id
  191. curTag = dom.tagName.toLowerCase() + '<span style="color:red;">#' + dom.id + '</span>';
  192. } else if(dom.className) { //没有id,但有class,则拼接class
  193. curTag = dom.tagName.toLowerCase() + '<span style="color:green;">.' + dom.className.split(/\s+/).join('.') + '</span>';
  194. } else { //没有id也没有class,就只要标签名
  195. curTag = dom.tagName.toLowerCase();
  196. }
  197. }catch(e){
  198. continue;
  199. }
  200. depth++;
  201. xpath.unshift(curTag);
  202. } while((dom = dom.parentNode) && dom.nodeType === 1);
  203. //判断当前这个dom节点是否为最大深度
  204. if(depth > maxDepth.depth) {
  205. maxDepth.depth = depth;
  206. maxDepth.xpath = xpath.join('<span style="color:gray;">&gt;</span>');
  207. }
  208. };
  209. /**
  210. * 扫描整个页面的所有元素,侦测并记录结果
  211. */
  212. var _scanAllElements = function(){
  213. //所有节点
  214. var elementList = _getNodes(document.documentElement, NodeFilter.SHOW_ELEMENT);
  215. //所有节点个数
  216. _summaryInformation.DOM.count = elementList.length;
  217. //定义一个对象,用来标记节点的ID,当某一个节点的ID值大于1时,表示ID重复
  218. var objDomId = {};
  219. //页面扫描
  220. for (var i = 0, len = elementList.length; i < len; ++i) {
  221. var element = elementList[i];
  222. //侦测过时的标签
  223. _detectDeprecatedTag(element);
  224. //侦测过时的属性
  225. _detectDeprecatedAttribute(element);
  226. //最大深度检测
  227. _detectDomMaxDepth(element);
  228. //ID记录
  229. if(!!element.id) {
  230. if(!objDomId[element.id]) objDomId[element.id] = 0;
  231. objDomId[element.id]++;
  232. }
  233. }
  234. //侦测重复的ID
  235. _detectDuplicatedID(objDomId);
  236. };
  237. /**
  238. * 检测页面上的link标签
  239. */
  240. var _detectLink = function(){
  241. //获取页面上所有的link标签
  242. var allLink = document.querySelectorAll('link');
  243. //获取head标签内的link标签
  244. var inHeadLink = document.querySelectorAll('head link');
  245. //不在Head标签内的Link
  246. var notInHeadLink = [];
  247. jQuery.each(allLink,function(i,link){
  248. var isNotInHead = true;
  249. jQuery.each(inHeadLink,function(j,temp){
  250. if(link.href == temp.href) {
  251. isNotInHead = false;
  252. }
  253. });
  254. isNotInHead ? notInHeadLink.push(link) : false;
  255. });
  256. //记录未标记在head标签中的link
  257. _summaryInformation.LINK.notInHead = notInHeadLink;
  258. };
  259. /**
  260. * 侦测页面上的title标签
  261. */
  262. var _detectTitle = function(){
  263. var allTitle = document.querySelectorAll('title');
  264. var inHeadTitle = document.querySelectorAll('head title');
  265. var flag = false;
  266. var titles = [];
  267. jQuery.each(allTitle,function(i,t){
  268. flag = false;
  269. jQuery.each(inHeadTitle,function(j,k){
  270. if(t == k) {
  271. flag = true;
  272. return false;
  273. }
  274. });
  275. titles.push({
  276. dom : t,
  277. isInHead : flag
  278. });
  279. });
  280. _summaryInformation.title = titles;
  281. };
  282. /**
  283. * 检测页面上是否存在src未空的img标签
  284. */
  285. var _detectImgTags = function(){
  286. //这里只检测src属性为空的img标签,如果img标签没有设置src属性,如<img />,则跳过检测
  287. var allImgTags = document.querySelectorAll('img[src]');
  288. var imgTags = [];
  289. var reg = /.*src=\"(.*)\".*/;
  290. var arr = [];
  291. jQuery.each(allImgTags,function(i,k){
  292. arr = reg.exec(k.outerHTML);
  293. if(!arr || arr[1].trim() == '') {
  294. imgTags.push(k);
  295. }
  296. });
  297. _summaryInformation.imgTag = imgTags;
  298. };
  299. /**
  300. * 对input[type=text],input[type=password]进行监测
  301. * 不能以size属性来确定其尺寸
  302. */
  303. var _detectInputBox = function(){
  304. var inputBoxs = document.querySelectorAll('input[type=text],input[type=password]');
  305. var invalidInput = _summaryInformation.DOM.invalidInput;
  306. jQuery.each(inputBoxs,function(i,input){
  307. if(input.getAttribute('size')) {
  308. invalidInput.count++;
  309. invalidInput.input.push(input);
  310. }
  311. });
  312. };
  313. /**
  314. * 检测标签的包含情况:是否有inline-tag包含了block-tag
  315. */
  316. var _detectTagIncludeCase = function(){
  317. var tagInclude = _summaryInformation.tagInclude;
  318. var tempArr = null;
  319. var inlineElm = null;
  320. //遍历inline-tag
  321. jQuery.each(INLINE_HTML_ELEMENT,function(i,inlineTag){
  322. //遍历block-tag
  323. jQuery.each(BLOCK_HTML_ELEMENT,function(j,blockTag){
  324. tempArr = document.querySelectorAll(inlineTag + '>' + blockTag);
  325. if(tempArr.length > 0) {
  326. inlineElm = getOuterHtmlEllipsis(tempArr[0].parentNode);
  327. jQuery.each(tempArr,function(k,item){
  328. tagInclude.push({
  329. inline : inlineElm, //包含了block-tag的inline-tag
  330. block : getOuterHtmlEllipsis(item) //被包含的block-tag
  331. });
  332. });
  333. }
  334. });
  335. });
  336. };
  337. /**
  338. * 检测页面上是否有没有闭合的标签
  339. * Chrome会自动补全未闭合的标签,所以通过innerHTML获取到的HTML内容已经是闭合的了
  340. */
  341. var _detectTagUnClosed = function(){
  342. var html = _pageSource;
  343. //开始进行html代码词法分析
  344. var htmlInstance = new baidu.htmlAnalytic();
  345. var rst = htmlInstance.getUnclosedTags(html);
  346. for(var i = 0;i < rst.length;i++){
  347. _summaryInformation.unClosedTags.push(rst[i].outerHTML.replace(/</g,'&lt;').replace(/>/g,'&gt;'));
  348. }
  349. };
  350. /**
  351. * 检测HTML代码是否压缩过
  352. */
  353. var _detectHtmlMinify = function(){
  354. var lines = _pageSource.split(/\n/);
  355. var average_length_perline = _pageSource.length / lines.length;
  356. if (average_length_perline < 150) {
  357. _summaryInformation.htmlMinified = false;
  358. }
  359. };
  360. /**
  361. * 获取本页面的源代码
  362. */
  363. var _getPageSource = function(callback){
  364. chrome.extension.sendMessage({
  365. type : MSG_TYPE.GET_HTML,
  366. link : location.href.split('#')[0]
  367. },function(respData){
  368. //保存源代码
  369. _pageSource = respData.content;
  370. //html就绪
  371. chrome.extension.sendMessage({
  372. type : MSG_TYPE.HTML_READY
  373. });
  374. callback && callback();
  375. });
  376. };
  377. /**
  378. * 初始化
  379. */
  380. var _init = function(callback){
  381. //获取本页源代码
  382. _getPageSource(callback);
  383. };
  384. /**
  385. * 执行html侦测
  386. * @param {Function} callback 侦测完毕后的回调方法,形如:function(data){}
  387. * @config {Object} data 就是_summaryInformation
  388. */
  389. var _detect = function (callback){
  390. //初始化结果集
  391. _initSummaryInformation();
  392. //扫描整个页面
  393. _scanAllElements();
  394. //侦测title标签
  395. _detectTitle();
  396. //侦测link标签
  397. _detectLink();
  398. //检测页面上的img标签是否src=''
  399. _detectImgTags();
  400. //侦测compatmode
  401. _detectCompatMode();
  402. //侦测IE条件注释
  403. _detectIECondComm();
  404. //问题Input,使用了size来确定其尺寸,不合法
  405. _detectInputBox();
  406. //检测是否有inline-tag包含了block-tag
  407. _detectTagIncludeCase();
  408. //检测未闭合的标签
  409. _detectTagUnClosed();
  410. //检测HTML代码是否压缩过
  411. _detectHtmlMinify();
  412. //执行回调
  413. if(callback && typeof callback == "function") {
  414. callback.call(null,_summaryInformation);
  415. }
  416. };
  417. return {
  418. init : _init,
  419. detect : _detect
  420. };
  421. })();