Browse Source

分词优化

懒得勤快 4 years ago
parent
commit
f057ee416f

+ 1 - 1
Masuit.LuceneEFCore.SearchEngine/LuceneIndexSearcher.cs

@@ -71,7 +71,7 @@ namespace Masuit.LuceneEFCore.SearchEngine
             list.AddRange(Regex.Matches(keyword, @"[\u4e00-\u9fa5]+").Cast<Match>().Select(m => m.Value));//中文
             list.AddRange(Regex.Matches(keyword, @"\p{P}?[A-Z]*[a-z]*[\p{P}|\p{S}]*").Cast<Match>().Select(m => m.Value));//英文单词
             list.AddRange(Regex.Matches(keyword, "([A-z]+)([0-9.]+)").Cast<Match>().SelectMany(m => m.Groups.Cast<Group>().Select(g => g.Value)));//英文+数字
-            list.AddRange(new JiebaSegmenter().CutForSearch(keyword));//结巴分词
+            list.AddRange(new JiebaSegmenter().Cut(keyword, true));//结巴分词
             list.RemoveAll(s => s.Length < 2);
             list = list.Distinct().OrderByDescending(s => s.Length).Take(10).ToList();
             _memoryCache.Set(keyword, list, TimeSpan.FromHours(1));

+ 12 - 12
Masuit.LuceneEFCore.SearchEngine/Masuit.LuceneEFCore.SearchEngine.csproj

@@ -9,7 +9,7 @@
         <Copyright>懒得勤快</Copyright>
         <PackageProjectUrl>https://github.com/ldqk/Masuit.LuceneEFCore.SearchEngine</PackageProjectUrl>
         <PackageId>Masuit.LuceneEFCore.SearchEngine_int</PackageId>
-        <Version>1.1.3</Version>
+        <Version>1.1.4</Version>
         <Configurations>Debug;Release;String版本;Guid版本;Long版本</Configurations>
         <RunAnalyzersDuringBuild>false</RunAnalyzersDuringBuild>
         <RunAnalyzersDuringLiveAnalysis>false</RunAnalyzersDuringLiveAnalysis>
@@ -19,8 +19,8 @@
         <IncludeSymbols>true</IncludeSymbols>
         <SymbolPackageFormat>snupkg</SymbolPackageFormat>
         <LangVersion>9</LangVersion>
-        <FileVersion>1.1.3</FileVersion>
-        <AssemblyVersion>1.1.3</AssemblyVersion>
+        <FileVersion>1.1.4</FileVersion>
+        <AssemblyVersion>1.1.4</AssemblyVersion>
         <EmbedUntrackedSources>true</EmbedUntrackedSources>
         <IncludeSymbols>true</IncludeSymbols>
         <SymbolPackageFormat>snupkg</SymbolPackageFormat>
@@ -32,6 +32,15 @@
         <PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.0.0" PrivateAssets="All" />
         <PackageReference Include="Newtonsoft.Json" Version="13.0.1" />
     </ItemGroup>
+    <ItemGroup Condition=" '$(TargetFramework)' == 'net5'">
+        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="5.0.8" />
+    </ItemGroup>
+    <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.1'">
+        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="3.1.9" />
+    </ItemGroup>
+    <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0'">
+        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="2.2.6" />
+    </ItemGroup>
 
     <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
         <DefineConstants>TRACE;Int</DefineConstants>
@@ -61,15 +70,6 @@
         <Optimize>true</Optimize>
         <DocumentationFile>.\Masuit.LuceneEFCore.SearchEngine.xml</DocumentationFile>
     </PropertyGroup>
-    <ItemGroup Condition=" '$(TargetFramework)' == 'net5'">
-        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="5.0.7" />
-    </ItemGroup>
-    <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.1'">
-        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="3.1.9" />
-    </ItemGroup>
-    <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0'">
-        <PackageReference Include="Microsoft.EntityFrameworkCore" Version="2.2.6" />
-    </ItemGroup>
     <ItemGroup>
         <None Update="Masuit.LuceneEFCore.SearchEngine.xml">
             <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>

+ 5 - 1
WebSearchDemo/Startup.cs

@@ -1,4 +1,5 @@
-using Masuit.LuceneEFCore.SearchEngine;
+using JiebaNet.Segmenter;
+using Masuit.LuceneEFCore.SearchEngine;
 using Masuit.LuceneEFCore.SearchEngine.Extensions;
 using Masuit.LuceneEFCore.SearchEngine.Interfaces;
 using Microsoft.AspNetCore.Builder;
@@ -60,6 +61,9 @@ namespace WebSearchDemo
             {
                 app.UseDeveloperExceptionPage();
             }
+            new JiebaSegmenter().AddWord("会声会影"); //添加自定义词库
+            new JiebaSegmenter().AddWord("思杰马克丁"); //添加自定义词库
+            new JiebaSegmenter().AddWord("TeamViewer"); //添加自定义词库
             db.Post.AddRange(JsonConvert.DeserializeObject<List<Post>>(File.ReadAllText(AppContext.BaseDirectory + "Posts.json")));
             searchEngine.CreateIndex(new List<string>()
             {

+ 3 - 3
WebSearchDemo/WebSearchDemo.csproj

@@ -13,9 +13,9 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="5.0.4" />
-    <PackageReference Include="Microsoft.EntityFrameworkCore.InMemory" Version="5.0.4" />
-    <PackageReference Include="Swashbuckle.AspNetCore" Version="6.1.0" />
+    <PackageReference Include="Microsoft.AspNetCore.Mvc.NewtonsoftJson" Version="5.0.8" />
+    <PackageReference Include="Microsoft.EntityFrameworkCore.InMemory" Version="5.0.8" />
+    <PackageReference Include="Swashbuckle.AspNetCore" Version="6.1.5" />
   </ItemGroup>
 
   <ItemGroup>