Browse Source

Use publicsuffixlist for base domain extraction. Fix #276.

FelisCatus 10 years ago
parent
commit
0ae801e716
3 changed files with 25 additions and 26 deletions
  1. 1 0
      omega-pac/package.json
  2. 19 15
      omega-pac/src/utils.coffee
  3. 5 11
      omega-pac/test/utils.coffee

+ 1 - 0
omega-pac/package.json

@@ -18,6 +18,7 @@
   },
   "dependencies": {
     "ipv6": "beaugunderson/javascript-ipv6",
+    "tldjs": "^1.5.2",
     "uglify-js": "^2.4.15"
   },
   "browser": {

+ 19 - 15
omega-pac/src/utils.coffee

@@ -39,19 +39,23 @@ class AttachedCache
 
 exports.AttachedCache = AttachedCache
 
-exports.getBaseDomain = (domain) ->
-  return domain if domain.indexOf(':') > 0 # IPv6
+tld = require('tldjs')
+
+exports.isIp = (domain) ->
+  return true if domain.indexOf(':') > 0 # IPv6
   lastCharCode = domain.charCodeAt(domain.length - 1)
-  return domain if 48 <= lastCharCode <= 57 # IP address ending with number.
-  segments = domain.split('.')
-  if segments.length <= 2
-    return domain
-  if segments[0] == 'www'
-    segments.shift()
-  len = segments.length
-  if len <= 2
-    return segments.join('.')
-  if segments[len - 2].length <= 2
-    return segments[len - 3] + '.' + segments[len - 2] + '.' + segments[len - 1]
-  else
-    return segments[len - 2] + '.' + segments[len - 1]
+  return true if 48 <= lastCharCode <= 57 # IP address ending with number.
+  return false
+
+exports.getBaseDomain = (domain) ->
+  return domain if exports.isIp(domain)
+  return tld.getDomain(domain) ? domain
+
+exports.wildcardForDomain = (domain) ->
+  return domain if exports.isIp(domain)
+  return '*.' + exports.getBaseDomain(domain)
+
+Url = require('url')
+exports.wildcardForUrl = (url) ->
+  domain = Url.parse(url).hostname
+  return exports.wildcardForDomain(domain)

+ 5 - 11
omega-pac/test/utils.coffee

@@ -10,19 +10,13 @@ describe 'getBaseDomain', ->
     getBaseDomain('example.com').should.equal('example.com')
     getBaseDomain('e.test').should.equal('e.test')
     getBaseDomain('a.b').should.equal('a.b')
-  it 'should ignore the leading www with domains with two or more levels', ->
-    getBaseDomain('www.example.com').should.equal('example.com')
-    getBaseDomain('www.e.test').should.equal('e.test')
-    getBaseDomain('www.a.b').should.equal('a.b')
-  it 'should assume two-segment TLD if len(second segment from last) <= 2', ->
+  it 'should treat two-segment TLD as one component', ->
     getBaseDomain('images.google.co.uk').should.equal('google.co.uk')
     getBaseDomain('images.google.co.jp').should.equal('google.co.jp')
-    getBaseDomain('ab.de.ef.test').should.equal('de.ef.test')
-  it 'should assume one-segment TLD and keep two segments as base otherwise', ->
-    getBaseDomain('subdomain.example.com').should.equal('example.com')
-    getBaseDomain('some.site.example.net').should.equal('example.net')
-    getBaseDomain('some.site.abc.test').should.equal('abc.test')
-    getBaseDomain('ab.de.efg.test').should.equal('efg.test')
+    getBaseDomain('example.com.cn').should.equal('example.com.cn')
+  it 'should not mistake short domains with two-segment TLDs', ->
+    getBaseDomain('a.bc.com').should.equal('bc.com')
+    getBaseDomain('i.t.co').should.equal('t.co')
   it 'should not try to modify IP address literals', ->
     getBaseDomain('127.0.0.1').should.equal('127.0.0.1')
     getBaseDomain('[::1]').should.equal('[::1]')