浏览代码

fix: handle new description

NateScarlet 11 月之前
父节点
当前提交
a7a1065c70
共有 2 个文件被更改,包括 10 次插入2 次删除
  1. 5 0
      scripts/description_parsing_cases.json
  2. 5 2
      scripts/fetch.py

+ 5 - 0
scripts/description_parsing_cases.json

@@ -1,4 +1,9 @@
 [
 [
+  {
+    "year": 2024,
+    "description": "1月1日(周三)放假1天,不调休",
+    "expected": [{ "date": "2024-01-01", "isOffDay": true }]
+  },
   {
   {
     "year": 2019,
     "year": 2019,
     "description": "2018年12月30日至2019年1月1日放假调休,共3天。2018年12月29日(星期六)上班。",
     "description": "2018年12月30日至2019年1月1日放假调休,共3天。2018年12月29日(星期六)上班。",

+ 5 - 2
scripts/fetch.py

@@ -277,7 +277,7 @@ class DescriptionParser:
 class SentenceParser:
 class SentenceParser:
     """Parser for holiday shift description sentence."""
     """Parser for holiday shift description sentence."""
 
 
-    def __init__(self, parent: DescriptionParser, sentence):
+    def __init__(self, parent: DescriptionParser, sentence: str):
         self.parent = parent
         self.parent = parent
         self.sentence = sentence
         self.sentence = sentence
 
 
@@ -316,7 +316,8 @@ class SentenceParser:
     def _extract_dates_2(self, value: str) -> Iterator[date]:
     def _extract_dates_2(self, value: str) -> Iterator[date]:
         value = re.sub(r"(.+?)", "", value)
         value = re.sub(r"(.+?)", "", value)
         match = re.findall(
         match = re.findall(
-            r"(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日", value
+            r"(?:(\d+)年)?(?:(\d+)月)?(\d+)日(?:至|-|—)(?:(\d+)年)?(?:(\d+)月)?(\d+)日",
+            value,
         )
         )
         for groups in match:
         for groups in match:
             groups = [_cast_int(i) for i in groups]
             groups = [_cast_int(i) for i in groups]
@@ -357,6 +358,8 @@ class SentenceParser:
                 yield i
                 yield i
 
 
     def _parse_rest_1(self):
     def _parse_rest_1(self):
+        if self.sentence.startswith("不"):
+            return
         match = re.match(r"(.+)(放假|补休|调休|公休)+(?:\d+天)?$", self.sentence)
         match = re.match(r"(.+)(放假|补休|调休|公休)+(?:\d+天)?$", self.sentence)
         if match:
         if match:
             for i in self.extract_dates(match.group(1)):
             for i in self.extract_dates(match.group(1)):