懒得勤快 6 rokov pred
rodič
commit
db58dadced

+ 37 - 0
Masuit.LuceneEFCore.SearchEngine/Helpers/BaiduAnalysisModel.cs

@@ -0,0 +1,37 @@
+using Newtonsoft.Json;
+using System.Collections.Generic;
+
+namespace Masuit.LuceneEFCore.SearchEngine.Helpers
+{
+    public class BaiduAnalysisModel
+    {
+
+        [JsonProperty("result")]
+        public Result Result { get; set; }
+    }
+
+    public class Result
+    {
+        [JsonProperty("res")]
+        public Res Res { get; set; }
+    }
+
+    public class Res
+    {
+        [JsonProperty("keyword_list")]
+        public List<string> KeywordList { get; set; }
+
+        [JsonProperty("real_title")]
+        public string RealTitle { get; set; }
+
+        [JsonProperty("title")]
+        public string Title { get; set; }
+
+        [JsonProperty("wordpos")]
+        public List<string> Wordpos { get; set; }
+
+        [JsonProperty("wordrank")]
+        public List<string> Wordrank { get; set; }
+    }
+
+}

+ 70 - 14
Masuit.LuceneEFCore.SearchEngine/LuceneIndexSearcher.cs

@@ -4,28 +4,89 @@ using Lucene.Net.Index;
 using Lucene.Net.QueryParsers.Classic;
 using Lucene.Net.Search;
 using Lucene.Net.Store;
+using Masuit.LuceneEFCore.SearchEngine.Helpers;
 using Masuit.LuceneEFCore.SearchEngine.Interfaces;
+using Microsoft.Extensions.Caching.Memory;
+using Newtonsoft.Json;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
+using System.Net;
+using System.Net.Http;
+using System.Text.RegularExpressions;
 
 namespace Masuit.LuceneEFCore.SearchEngine
 {
     public class LuceneIndexSearcher : ILuceneIndexSearcher
     {
-        private static Directory directory;
-        private static Analyzer analyzer;
+        private static Directory _directory;
+        private static Analyzer _analyzer;
+        private readonly IMemoryCache _memoryCache;
+        private static readonly HttpClient HttpClient = new HttpClient()
+        {
+            BaseAddress = new Uri("http://zhannei.baidu.com")
+        };
 
         /// <summary>
         /// 构造函数
         /// </summary>
         /// <param name="directory">索引目录</param>
         /// <param name="analyzer">索引分析器</param>
-        public LuceneIndexSearcher(Directory directory, Analyzer analyzer)
+        public LuceneIndexSearcher(Directory directory, Analyzer analyzer, IMemoryCache memoryCache)
         {
-            LuceneIndexSearcher.directory = directory;
-            LuceneIndexSearcher.analyzer = analyzer;
+            _directory = directory;
+            _analyzer = analyzer;
+            _memoryCache = memoryCache;
+        }
+
+        /// <summary>
+        /// 分词
+        /// </summary>
+        /// <param name="keyword"></param>
+        /// <returns></returns>
+        public List<string> CutKeywords(string keyword)
+        {
+            if (_memoryCache.TryGetValue(keyword, out List<string> list))
+            {
+                return list;
+            }
+            var set = new HashSet<string>
+            {
+                keyword
+            };
+            var mc = Regex.Matches(keyword, @"(([A-Z]*[a-z]*)[\d]*)([\u4E00-\u9FA5]+)*((?!\p{P}).)*");
+            foreach (Match m in mc)
+            {
+                set.Add(m.Value);
+                foreach (Group g in m.Groups)
+                {
+                    set.Add(g.Value);
+                }
+            }
+            if (keyword.Length >= 6)
+            {
+                try
+                {
+                    var res = HttpClient.GetAsync($"/api/customsearch/keywords?title={keyword}").Result;
+                    if (res.StatusCode == HttpStatusCode.OK)
+                    {
+                        BaiduAnalysisModel model = JsonConvert.DeserializeObject<BaiduAnalysisModel>(res.Content.ReadAsStringAsync().Result, new JsonSerializerSettings()
+                        {
+                            NullValueHandling = NullValueHandling.Ignore
+                        });
+                        model.Result.Res.KeywordList?.ForEach(s => set.Add(s));
+                    }
+                }
+                catch
+                {
+                    // ignored
+                }
+            }
+            set.RemoveWhere(s => s.Length < 2 || Regex.IsMatch(s, @"^\p{P}.*"));
+            list = set.OrderByDescending(s => s.Length).ToList();
+            _memoryCache.Set(keyword, list);
+            return list;
         }
 
         /// <summary>
@@ -37,12 +98,7 @@ namespace Masuit.LuceneEFCore.SearchEngine
         private BooleanQuery GetFuzzyquery(MultiFieldQueryParser parser, string keywords)
         {
             var finalQuery = new BooleanQuery();
-
-            string[] terms = keywords.Split(new[] //todo:分词
-            {
-                " "
-            }, StringSplitOptions.RemoveEmptyEntries);
-
+            var terms = CutKeywords(keywords);
             foreach (var term in terms)
             {
                 finalQuery.Add(parser.Parse(term.Replace("~", "") + "~"), Occur.MUST);
@@ -61,7 +117,7 @@ namespace Masuit.LuceneEFCore.SearchEngine
             // 结果集
             ILuceneSearchResultCollection results = new LuceneSearchResultCollection();
 
-            using (var reader = DirectoryReader.Open(directory))
+            using (var reader = DirectoryReader.Open(_directory))
             {
                 var searcher = new IndexSearcher(reader);
                 Query query;
@@ -75,13 +131,13 @@ namespace Masuit.LuceneEFCore.SearchEngine
                 if (options.Fields.Count == 1)
                 {
                     // 单字段搜索
-                    QueryParser queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], analyzer);
+                    QueryParser queryParser = new QueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields[0], _analyzer);
                     query = queryParser.Parse(options.Keywords);
                 }
                 else
                 {
                     // 多字段搜索
-                    MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), analyzer, options.Boosts);
+                    MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Lucene.Net.Util.LuceneVersion.LUCENE_48, options.Fields.ToArray(), _analyzer, options.Boosts);
                     query = GetFuzzyquery(multiFieldQueryParser, options.Keywords);
                 }
 

+ 6 - 4
Masuit.LuceneEFCore.SearchEngine/SearchEngine.cs

@@ -6,6 +6,7 @@ using Lucene.Net.Store;
 using Masuit.LuceneEFCore.SearchEngine.Helpers;
 using Masuit.LuceneEFCore.SearchEngine.Interfaces;
 using Microsoft.EntityFrameworkCore;
+using Microsoft.Extensions.Caching.Memory;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
@@ -31,7 +32,7 @@ namespace Masuit.LuceneEFCore.SearchEngine
         private static LuceneIndexer _indexer;
         private static LuceneIndexSearcher _searcher;
         private static bool isInitialized = false;
-
+        private readonly IMemoryCache _memoryCache;
         /// <summary>
         /// 索引条数
         /// </summary>
@@ -43,14 +44,15 @@ namespace Masuit.LuceneEFCore.SearchEngine
         /// <param name="indexerOptions">索引选项</param>
         /// <param name="context">数据库上下文</param>
         /// <param name="overrideIfExists">是否被覆盖</param>
-        public SearchEngine(LuceneIndexerOptions indexerOptions, TContext context, bool overrideIfExists = false)
+        public SearchEngine(LuceneIndexerOptions indexerOptions, TContext context, IMemoryCache memoryCache, bool overrideIfExists = false)
         {
+            Context = context;
+            _memoryCache = memoryCache;
             if (isInitialized == false || overrideIfExists)
             {
                 InitializeLucene(indexerOptions);
             }
 
-            Context = context;
         }
 
         /// <summary>
@@ -73,7 +75,7 @@ namespace Masuit.LuceneEFCore.SearchEngine
 
             _analyzer = new JieBaAnalyzer(TokenizerMode.Search);
             _indexer = new LuceneIndexer(_directory, _analyzer);
-            _searcher = new LuceneIndexSearcher(_directory, _analyzer);
+            _searcher = new LuceneIndexSearcher(_directory, _analyzer, _memoryCache);
         }
 
         /// <summary>