Browse Source

Source: Avoid out-of-range inputs to std::isspace()

`isspace` takes `int` but documents that the value must be representable
by `unsigned char`, or be EOF.  Use a wrapper to cast to `unsigned char`
to avoid sign extension while converting to `int`.  This generalizes the
fix from commit 5e8c176e2a (cmExecuteProcessCommand: Cast c to unsigned
char before cast to int, 2024-01-05) to other `isspace` call sites.

This was detected by assertions in the MSVC standard library while
processing UTF-8 text.

Issue: #25561
Brad King 1 year ago
parent
commit
d9d9326e14

+ 4 - 5
Source/CTest/cmCTestGIT.cxx

@@ -2,7 +2,6 @@
    file Copyright.txt or https://cmake.org/licensing for details.  */
 #include "cmCTestGIT.h"
 
-#include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <ctime>
@@ -414,14 +413,14 @@ protected:
 
   const char* ConsumeSpace(const char* c)
   {
-    while (*c && isspace(*c)) {
+    while (*c && cmIsSpace(*c)) {
       ++c;
     }
     return c;
   }
   const char* ConsumeField(const char* c)
   {
-    while (*c && !isspace(*c)) {
+    while (*c && !cmIsSpace(*c)) {
       ++c;
     }
     return c;
@@ -481,7 +480,7 @@ private:
   {
     // Person Name <[email protected]> 1234567890 +0000
     const char* c = str;
-    while (*c && isspace(*c)) {
+    while (*c && cmIsSpace(*c)) {
       ++c;
     }
 
@@ -490,7 +489,7 @@ private:
       ++c;
     }
     const char* name_last = c;
-    while (name_last != name_first && isspace(*(name_last - 1))) {
+    while (name_last != name_first && cmIsSpace(*(name_last - 1))) {
       --name_last;
     }
     person.Name.assign(name_first, name_last - name_first);

+ 2 - 2
Source/cmCMakeHostSystemInformationCommand.cxx

@@ -178,7 +178,7 @@ cm::optional<std::pair<std::string, std::string>> ParseOSReleaseLine(
         if (std::isalpha(ch) || ch == '_') {
           key += ch;
           state = PARSE_KEY;
-        } else if (!std::isspace(ch)) {
+        } else if (!cmIsSpace(ch)) {
           state = IGNORE_REST;
         }
         break;
@@ -238,7 +238,7 @@ cm::optional<std::pair<std::string, std::string>> ParseOSReleaseLine(
         break;
 
       case PARSE_VALUE:
-        if (ch == '#' || std::isspace(ch)) {
+        if (ch == '#' || cmIsSpace(ch)) {
           state = IGNORE_REST;
         } else {
           value += ch;

+ 1 - 6
Source/cmExecuteProcessCommand.cxx

@@ -2,7 +2,6 @@
    file Copyright.txt or https://cmake.org/licensing for details.  */
 #include "cmExecuteProcessCommand.h"
 
-#include <cctype> /* isspace */
 #include <cstdint>
 #include <cstdio>
 #include <iostream>
@@ -35,11 +34,7 @@
 namespace {
 bool cmExecuteProcessCommandIsWhitespace(char c)
 {
-  // isspace takes 'int' but documents that the value must be representable
-  // by 'unsigned char', or EOF.  Cast to 'unsigned char' to avoid sign
-  // extension while casting to 'int'.
-  return (isspace(static_cast<int>(static_cast<unsigned char>(c))) ||
-          c == '\n' || c == '\r');
+  return (cmIsSpace(c) || c == '\n' || c == '\r');
 }
 
 void cmExecuteProcessCommandFixText(std::vector<char>& output,

+ 2 - 2
Source/cmMakefile.cxx

@@ -1438,8 +1438,8 @@ static void s_RemoveDefineFlag(std::string const& flag, std::string& dflags)
   for (std::string::size_type lpos = dflags.find(flag, 0);
        lpos != std::string::npos; lpos = dflags.find(flag, lpos)) {
     std::string::size_type rpos = lpos + len;
-    if ((lpos <= 0 || isspace(dflags[lpos - 1])) &&
-        (rpos >= dflags.size() || isspace(dflags[rpos]))) {
+    if ((lpos <= 0 || cmIsSpace(dflags[lpos - 1])) &&
+        (rpos >= dflags.size() || cmIsSpace(dflags[rpos]))) {
       dflags.erase(lpos, len);
     } else {
       ++lpos;

+ 2 - 3
Source/cmRST.cxx

@@ -3,7 +3,6 @@
 #include "cmRST.h"
 
 #include <algorithm>
-#include <cctype>
 #include <cstddef>
 #include <iterator>
 #include <utility>
@@ -159,7 +158,7 @@ void cmRST::ProcessLine(std::string const& line)
   // A line starting in .. is an explicit markup start.
   if (line == ".." ||
       (line.size() >= 3 && line[0] == '.' && line[1] == '.' &&
-       isspace(line[2]))) {
+       cmIsSpace(line[2]))) {
     this->Reset();
     this->MarkupType =
       (line.find_first_not_of(" \t", 2) == std::string::npos ? Markup::Empty
@@ -219,7 +218,7 @@ void cmRST::ProcessLine(std::string const& line)
   }
   // Indented lines following an explicit markup start are explicit markup.
   else if (this->MarkupType != Markup::None &&
-           (line.empty() || isspace(line[0]))) {
+           (line.empty() || cmIsSpace(line[0]))) {
     this->MarkupType = Markup::Normal;
     // Record markup lines if the start line was recorded.
     if (!this->MarkupLines.empty()) {

+ 4 - 1
Source/cmStringAlgorithms.h

@@ -44,7 +44,10 @@ private:
 /** Returns true if the character @a ch is a whitespace character.  **/
 inline bool cmIsSpace(char ch)
 {
-  return ((ch & 0x80) == 0) && std::isspace(ch);
+  // isspace takes 'int' but documents that the value must be representable
+  // by 'unsigned char', or be EOF.  Cast to 'unsigned char' to avoid sign
+  // extension while converting to 'int'.
+  return std::isspace(static_cast<unsigned char>(ch));
 }
 
 /** Returns a string that has whitespace removed from the start and the end. */

+ 1 - 2
Source/cmStringCommand.cxx

@@ -6,7 +6,6 @@
 #include "cmStringCommand.h"
 
 #include <algorithm>
-#include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
@@ -660,7 +659,7 @@ bool HandleStripCommand(std::vector<std::string> const& args,
   const char* ptr = stringValue.c_str();
   size_t cc;
   for (cc = 0; cc < inStringLength; ++cc) {
-    if (!isspace(*ptr)) {
+    if (!cmIsSpace(*ptr)) {
       if (startPos > inStringLength) {
         startPos = cc;
       }

+ 2 - 2
Source/cmSystemTools.cxx

@@ -486,7 +486,7 @@ bool cmSystemTools::SplitProgramFromArgs(std::string const& command,
   const char* c = command.c_str();
 
   // Skip leading whitespace.
-  while (isspace(static_cast<unsigned char>(*c))) {
+  while (cmIsSpace(*c)) {
     ++c;
   }
 
@@ -516,7 +516,7 @@ bool cmSystemTools::SplitProgramFromArgs(std::string const& command,
       in_double = true;
     } else if (*c == '\'') {
       in_single = true;
-    } else if (isspace(static_cast<unsigned char>(*c))) {
+    } else if (cmIsSpace(*c)) {
       break;
     } else {
       program += *c;