fcp-html.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. /**
  2. * 注册命名空间
  3. */
  4. baidu.namespace.register("baidu.html");
  5. /**
  6. * html相关处理
  7. * @author zhaoxianlie
  8. */
  9. baidu.html = (function(){
  10. /**
  11. * 页面源代码
  12. */
  13. var _pageSource = '';
  14. /**
  15. * 结果集
  16. */
  17. var _summaryInformation = null;
  18. /**
  19. * 初始化侦测结果
  20. */
  21. var _initSummaryInformation = function(){
  22. _summaryInformation = {
  23. HTMLBase: {
  24. HTMLDeprecatedAttribute: {}, //过期的属性
  25. HTMLDeprecatedTag: {} //过期的标签
  26. },
  27. documentMode: {
  28. hasDocType: false, //是否设置了doctype
  29. compatMode: {
  30. IE: 'Q', //IE中的compatMode
  31. WebKit: 'Q' //Webkit中的compatMode
  32. },
  33. publicId: '', //doctype中的publicId
  34. hasComment: false, //doctype前是否有注释
  35. hasConditionalComment: false, //doctype前是否有条件注释
  36. isUnusualDocType: false //是否为怪异的doctype
  37. },
  38. DOM: {
  39. IECondComm: [], //所有的IE条件注释
  40. FFNotSptComm: [], //Firefox中不支持的注释:不能出现‘--’
  41. allComm:[], //所有的注释
  42. count : 0, //所有节点的数量
  43. invalidInput:{
  44. count:0, //不合法的input数量
  45. input:[] //不合法的input集合
  46. },
  47. maxDepth : {
  48. xpath : '', //xpath
  49. depth : 1 //深度
  50. }
  51. },
  52. title: [], //HTML的title检测
  53. LINK: {
  54. notInHead: [] //不在Head标签内部的Link标签
  55. },
  56. ID: {
  57. ids: {}, //重复的ID
  58. count : 0 //出现重复ID的个数
  59. },
  60. tagInclude : [], //标签的包含关系
  61. unClosedTags : [], //未闭合的标签
  62. htmlMinified : true , //HTML是否压缩过
  63. imgTag : [] //Img标签的检测,src为空否
  64. };
  65. };
  66. /**
  67. * 检测某个标签是否为过时的标签
  68. * @param {Object} tagName
  69. */
  70. var _isHTMLDeprecatedTag = function(tagName) {
  71. return HTML_DEPRECATED_TAGS[tagName.toLowerCase()];
  72. };
  73. /**
  74. * 判断某个属性是否已过时
  75. * @param {Object} tagName 待检测的标签
  76. * @param {Object} attrName 待检测的属性
  77. */
  78. var _isHTMLDeprecatedAttribute = function(tagName, attrName){
  79. tagName = tagName.toLowerCase();
  80. attrName = attrName.toLowerCase();
  81. return (HTML_DEPRECATED_ATTRIBUTES[attrName] && HTML_DEPRECATED_ATTRIBUTES[attrName][tagName]);
  82. };
  83. /**
  84. * 将检测到的过时标签记录到结果集中
  85. * @param {Object} element
  86. */
  87. var _detectDeprecatedTag = function(element){
  88. var tagName = element.tagName.toLowerCase();
  89. if (_isHTMLDeprecatedTag(tagName)) {
  90. var HTMLDeprecatedTag = _summaryInformation.HTMLBase.HTMLDeprecatedTag;
  91. if (!HTMLDeprecatedTag[tagName]) {
  92. HTMLDeprecatedTag[tagName] = 0;
  93. }
  94. HTMLDeprecatedTag[tagName]++;
  95. }
  96. };
  97. /**
  98. * 将检测到的过时属性记录到结果集中
  99. * @param {Object} element
  100. */
  101. var _detectDeprecatedAttribute = function(element){
  102. var tagName = element.tagName.toLowerCase();
  103. var attributes = element.attributes;
  104. var HTMLDeprecatedAttribute = _summaryInformation.HTMLBase.HTMLDeprecatedAttribute;
  105. for (var j = 0, c = attributes.length; j < c; ++j) {
  106. var attrName = attributes[j].name;
  107. if (_isHTMLDeprecatedAttribute(tagName, attrName)) {
  108. if (!HTMLDeprecatedAttribute[attrName]) {
  109. HTMLDeprecatedAttribute[attrName] = {};
  110. }
  111. if(!HTMLDeprecatedAttribute[attrName][tagName]) {
  112. HTMLDeprecatedAttribute[attrName][tagName] = 0;
  113. }
  114. HTMLDeprecatedAttribute[attrName][tagName]++;
  115. }
  116. }
  117. };
  118. /**
  119. * 获取页面上的符合过滤条件的所有节点,可以是TEXT、COMMENT、HTMLELEMENT等
  120. * @param {Object} rootNode 以该节点作为根节点开始进行搜索
  121. * @param {Integer} nodeFilter 过滤器,从NodeFilter中获得
  122. */
  123. var _getNodes = function(rootNode, nodeFilter){
  124. var nodeIterator = document.createNodeIterator(rootNode, nodeFilter, null, false);
  125. var nodes = [];
  126. var node = nodeIterator.nextNode();
  127. while (node) {
  128. nodes.push(node);
  129. node = nodeIterator.nextNode();
  130. }
  131. return nodes;
  132. };
  133. /**
  134. * 侦测IE条件注释
  135. */
  136. var _detectIECondComm = function(){
  137. var nodes = _getNodes(document.documentElement, NodeFilter.SHOW_COMMENT);
  138. //仅IE支持的注释
  139. var ieCondCommRegExp = /\[\s*if\s*[^\]][\s\w]*\]/i;
  140. //FF的注释中不能出现'--'
  141. var ffNotSupportComReg = /--/g;
  142. for (var i = 0, c = nodes.length; i < c; ++i) {
  143. var currentNode = nodes[i];
  144. if (ieCondCommRegExp.test(currentNode.nodeValue)) {
  145. _summaryInformation.DOM.IECondComm.push(currentNode.nodeValue);
  146. }
  147. if(ffNotSupportComReg.test(currentNode.nodeValue)) {
  148. _summaryInformation.DOM.FFNotSptComm.push(currentNode.nodeValue);
  149. }
  150. _summaryInformation.DOM.allComm.push(currentNode.nodeValue);
  151. }
  152. };
  153. /**
  154. * 侦测documentMode
  155. */
  156. var _detectCompatMode = function() {
  157. _summaryInformation.documentMode = baidu.doctype.getDocMode();
  158. };
  159. /**
  160. * 检测重复的ID
  161. */
  162. var _detectDuplicatedID = function(ids){
  163. var ID = _summaryInformation.ID;
  164. for(var id in ids) {
  165. if(ids[id] > 1) {
  166. ID.ids[id] = ids[id];
  167. ID['count']++;
  168. }
  169. }
  170. };
  171. /**
  172. * 检测页面DOM节点的最大深度
  173. */
  174. var _detectDomMaxDepth = function(dom){
  175. //如果不是html节点,则直接退出
  176. if(dom.nodeType !== 1) return;
  177. //扩展屏蔽
  178. if(dom.id === 'fe-helper-tab-box' || dom.id === 'fe-helper-pb-mask') return;
  179. //最大深度记录
  180. var maxDepth = _summaryInformation.DOM.maxDepth;
  181. var depth = 1;
  182. var curTag = '';
  183. if(dom.id) { //如果该节点有id,则拼接id
  184. curTag = dom.tagName.toLowerCase() + '<span style="color:red;">#' + dom.id + '</span>';
  185. } else if(dom.className) { //没有id,但有class,则拼接class
  186. curTag = dom.tagName.toLowerCase() + '<span style="color:green;">.' + dom.className.split(/\s+/).join('.') + '</span>';
  187. } else { //没有id也没有class,就只要标签名
  188. curTag = dom.tagName.toLowerCase();
  189. }
  190. var xpath = curTag;
  191. //深度遍历
  192. while((dom = dom.parentNode) && dom.nodeType === 1) {
  193. //扩展屏蔽
  194. if(dom.id === 'fe-helper-tab-box' || dom.id === 'fe-helper-pb-mask') return;
  195. if(dom.id) { //如果该节点有id,则拼接id
  196. curTag = dom.tagName.toLowerCase() + '<span style="color:red;">#' + dom.id + '</span>';
  197. } else if(dom.className) { //没有id,但有class,则拼接class
  198. curTag = dom.tagName.toLowerCase() + '<span style="color:green;">.' + dom.className.split(/\s+/).join('.') + '</span>';
  199. } else { //没有id也没有class,就只要标签名
  200. curTag = dom.tagName.toLowerCase();
  201. }
  202. depth++;
  203. xpath = curTag + '<span style="color:gray;">&gt;</span>' + xpath;
  204. }
  205. //判断当前这个dom节点是否为最大深度
  206. if(depth > maxDepth.depth) {
  207. maxDepth.depth = depth;
  208. maxDepth.xpath = xpath;
  209. }
  210. };
  211. /**
  212. * 扫描整个页面的所有元素,侦测并记录结果
  213. */
  214. var _scanAllElements = function(){
  215. //所有节点
  216. var elementList = _getNodes(document.documentElement, NodeFilter.SHOW_ELEMENT);
  217. //所有节点个数
  218. _summaryInformation.DOM.count = elementList.length;
  219. //定义一个对象,用来标记节点的ID,当某一个节点的ID值大于1时,表示ID重复
  220. var objDomId = {};
  221. //页面扫描
  222. for (var i = 0, len = elementList.length; i < len; ++i) {
  223. var element = elementList[i];
  224. //侦测过时的标签
  225. _detectDeprecatedTag(element);
  226. //侦测过时的属性
  227. _detectDeprecatedAttribute(element);
  228. //最大深度检测
  229. _detectDomMaxDepth(element);
  230. //ID记录
  231. if(!!element.id) {
  232. if(!objDomId[element.id]) objDomId[element.id] = 0;
  233. objDomId[element.id]++;
  234. }
  235. }
  236. //侦测重复的ID
  237. _detectDuplicatedID(objDomId);
  238. };
  239. /**
  240. * 检测页面上的link标签
  241. */
  242. var _detectLink = function(){
  243. //获取页面上所有的link标签
  244. var allLink = document.querySelectorAll('link');
  245. //获取head标签内的link标签
  246. var inHeadLink = document.querySelectorAll('head link');
  247. //不在Head标签内的Link
  248. var notInHeadLink = [];
  249. jQuery.each(allLink,function(i,link){
  250. var isNotInHead = true;
  251. jQuery.each(inHeadLink,function(j,temp){
  252. if(link.href == temp.href) {
  253. isNotInHead = false;
  254. }
  255. });
  256. isNotInHead ? notInHeadLink.push(link) : false;
  257. });
  258. //记录未标记在head标签中的link
  259. _summaryInformation.LINK.notInHead = notInHeadLink;
  260. };
  261. /**
  262. * 侦测页面上的title标签
  263. */
  264. var _detectTitle = function(){
  265. var allTitle = document.querySelectorAll('title');
  266. var inHeadTitle = document.querySelectorAll('head title');
  267. var flag = false;
  268. var titles = [];
  269. jQuery.each(allTitle,function(i,t){
  270. flag = false;
  271. jQuery.each(inHeadTitle,function(j,k){
  272. if(t == k) {
  273. flag = true;
  274. return false;
  275. }
  276. });
  277. titles.push({
  278. dom : t,
  279. isInHead : flag
  280. });
  281. });
  282. _summaryInformation.title = titles;
  283. };
  284. /**
  285. * 检测页面上是否存在src未空的img标签
  286. */
  287. var _detectImgTags = function(){
  288. //这里只检测src属性为空的img标签,如果img标签没有设置src属性,如<img />,则跳过检测
  289. var allImgTags = document.querySelectorAll('img[src]');
  290. var imgTags = [];
  291. var reg = /.*src=\"(.*)\".*/;
  292. var arr = [];
  293. jQuery.each(allImgTags,function(i,k){
  294. arr = reg.exec(k.outerHTML);
  295. if(!arr || arr[1].trim() == '') {
  296. imgTags.push(k);
  297. }
  298. });
  299. _summaryInformation.imgTag = imgTags;
  300. };
  301. /**
  302. * 对input[type=text],input[type=password]进行监测
  303. * 不能以size属性来确定其尺寸
  304. */
  305. var _detectInputBox = function(){
  306. var inputBoxs = document.querySelectorAll('input[type=text],input[type=password]');
  307. var invalidInput = _summaryInformation.DOM.invalidInput;
  308. jQuery.each(inputBoxs,function(i,input){
  309. if(input.getAttribute('size')) {
  310. invalidInput.count++;
  311. invalidInput.input.push(input);
  312. }
  313. });
  314. };
  315. /**
  316. * 检测标签的包含情况:是否有inline-tag包含了block-tag
  317. */
  318. var _detectTagIncludeCase = function(){
  319. var tagInclude = _summaryInformation.tagInclude;
  320. var tempArr = null;
  321. var inlineElm = null;
  322. //遍历inline-tag
  323. jQuery.each(INLINE_HTML_ELEMENT,function(i,inlineTag){
  324. //遍历block-tag
  325. jQuery.each(BLOCK_HTML_ELEMENT,function(j,blockTag){
  326. tempArr = document.querySelectorAll(inlineTag + '>' + blockTag);
  327. if(tempArr.length > 0) {
  328. inlineElm = getOuterHtmlEllipsis(tempArr[0].parentNode);
  329. jQuery.each(tempArr,function(k,item){
  330. tagInclude.push({
  331. inline : inlineElm, //包含了block-tag的inline-tag
  332. block : getOuterHtmlEllipsis(item) //被包含的block-tag
  333. });
  334. });
  335. }
  336. });
  337. });
  338. };
  339. /**
  340. * 检测页面上是否有没有闭合的标签
  341. * Chrome会自动补全未闭合的标签,所以通过innerHTML获取到的HTML内容已经是闭合的了
  342. */
  343. var _detectTagUnClosed = function(){
  344. var html = _pageSource;
  345. //开始进行html代码词法分析
  346. var htmlInstance = new baidu.htmlAnalytic();
  347. var rst = htmlInstance.getUnclosedTags(html);
  348. for(var i = 0;i < rst.length;i++){
  349. _summaryInformation.unClosedTags.push(rst[i].outerHTML.replace(/</g,'&lt;').replace(/>/g,'&gt;'));
  350. }
  351. };
  352. /**
  353. * 检测HTML代码是否压缩过
  354. */
  355. var _detectHtmlMinify = function(){
  356. var lines = _pageSource.split(/\n/);
  357. var average_length_perline = _pageSource.length / lines.length;
  358. if (average_length_perline < 150) {
  359. _summaryInformation.htmlMinified = false;
  360. }
  361. };
  362. /**
  363. * 获取本页面的源代码
  364. */
  365. var _getPageSource = function(callback){
  366. chrome.extension.sendMessage({
  367. type : MSG_TYPE.GET_HTML,
  368. link : location.href.split('#')[0]
  369. },function(respData){
  370. //保存源代码
  371. _pageSource = respData.content;
  372. //html就绪
  373. chrome.extension.sendMessage({
  374. type : MSG_TYPE.HTML_READY
  375. });
  376. callback && callback();
  377. });
  378. };
  379. /**
  380. * 初始化
  381. */
  382. var _init = function(callback){
  383. //获取本页源代码
  384. _getPageSource(callback);
  385. };
  386. /**
  387. * 执行html侦测
  388. * @param {Function} callback 侦测完毕后的回调方法,形如:function(data){}
  389. * @config {Object} data 就是_summaryInformation
  390. */
  391. var _detect = function (callback){
  392. //初始化结果集
  393. _initSummaryInformation();
  394. //扫描整个页面
  395. _scanAllElements();
  396. //侦测title标签
  397. _detectTitle();
  398. //侦测link标签
  399. _detectLink();
  400. //检测页面上的img标签是否src=''
  401. _detectImgTags();
  402. //侦测compatmode
  403. _detectCompatMode();
  404. //侦测IE条件注释
  405. _detectIECondComm();
  406. //问题Input,使用了size来确定其尺寸,不合法
  407. _detectInputBox();
  408. //检测是否有inline-tag包含了block-tag
  409. _detectTagIncludeCase();
  410. //检测未闭合的标签
  411. _detectTagUnClosed();
  412. //检测HTML代码是否压缩过
  413. _detectHtmlMinify();
  414. //执行回调
  415. if(callback && typeof callback == "function") {
  416. callback.call(null,_summaryInformation);
  417. }
  418. };
  419. return {
  420. init : _init,
  421. detect : _detect
  422. };
  423. })();