|
@@ -68,7 +68,7 @@ utf8iswordbreak( const char* s )
|
|
|
case 0x00A0: /* non-breaking space */
|
|
case 0x00A0: /* non-breaking space */
|
|
|
case 0x3000: /* ideographic space */
|
|
case 0x3000: /* ideographic space */
|
|
|
case 0xFEFF: /* zero-width non-breaking space */
|
|
case 0xFEFF: /* zero-width non-breaking space */
|
|
|
- return 1;
|
|
|
|
|
|
|
+ return 1;
|
|
|
default: break;
|
|
default: break;
|
|
|
}
|
|
}
|
|
|
return 0;
|
|
return 0;
|
|
@@ -77,61 +77,61 @@ utf8iswordbreak( const char* s )
|
|
|
char *
|
|
char *
|
|
|
first_word( char *s )
|
|
first_word( char *s )
|
|
|
{
|
|
{
|
|
|
- if ( s == NULL ) {
|
|
|
|
|
- return( NULL );
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- while ( iswordbreak( s ) ) {
|
|
|
|
|
- if ( *s == '\0' ) {
|
|
|
|
|
- return( NULL );
|
|
|
|
|
- } else {
|
|
|
|
|
- LDAP_UTF8INC( s );
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return( s );
|
|
|
|
|
|
|
+ if ( s == NULL ) {
|
|
|
|
|
+ return( NULL );
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ while ( iswordbreak( s ) ) {
|
|
|
|
|
+ if ( *s == '\0' ) {
|
|
|
|
|
+ return( NULL );
|
|
|
|
|
+ } else {
|
|
|
|
|
+ LDAP_UTF8INC( s );
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return( s );
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
char *
|
|
|
next_word( char *s )
|
|
next_word( char *s )
|
|
|
{
|
|
{
|
|
|
- if ( s == NULL ) {
|
|
|
|
|
- return( NULL );
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- while ( ! iswordbreak( s ) ) {
|
|
|
|
|
- LDAP_UTF8INC( s );
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- while ( iswordbreak( s ) ) {
|
|
|
|
|
- if ( *s == '\0' ) {
|
|
|
|
|
- return( NULL );
|
|
|
|
|
- } else {
|
|
|
|
|
- LDAP_UTF8INC( s );
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- return( s );
|
|
|
|
|
|
|
+ if ( s == NULL ) {
|
|
|
|
|
+ return( NULL );
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ while ( ! iswordbreak( s ) ) {
|
|
|
|
|
+ LDAP_UTF8INC( s );
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ while ( iswordbreak( s ) ) {
|
|
|
|
|
+ if ( *s == '\0' ) {
|
|
|
|
|
+ return( NULL );
|
|
|
|
|
+ } else {
|
|
|
|
|
+ LDAP_UTF8INC( s );
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return( s );
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
char *
|
|
|
word_dup( char *w )
|
|
word_dup( char *w )
|
|
|
{
|
|
{
|
|
|
- char *s, *ret;
|
|
|
|
|
- char save;
|
|
|
|
|
|
|
+ char *s, *ret;
|
|
|
|
|
+ char save;
|
|
|
|
|
|
|
|
- for ( s = w; !iswordbreak( s ); LDAP_UTF8INC( s ))
|
|
|
|
|
- ; /* NULL */
|
|
|
|
|
- save = *s;
|
|
|
|
|
- *s = '\0';
|
|
|
|
|
- ret = slapi_ch_strdup( w );
|
|
|
|
|
- *s = save;
|
|
|
|
|
|
|
+ for ( s = w; !iswordbreak( s ); LDAP_UTF8INC( s ))
|
|
|
|
|
+ ; /* NULL */
|
|
|
|
|
+ save = *s;
|
|
|
|
|
+ *s = '\0';
|
|
|
|
|
+ ret = slapi_ch_strdup( w );
|
|
|
|
|
+ *s = save;
|
|
|
|
|
|
|
|
- return( ret );
|
|
|
|
|
|
|
+ return( ret );
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#ifndef MAXPHONEMELEN
|
|
#ifndef MAXPHONEMELEN
|
|
|
-#define MAXPHONEMELEN 4
|
|
|
|
|
|
|
+#define MAXPHONEMELEN 6
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(SOUNDEX)
|
|
#if defined(SOUNDEX)
|
|
@@ -140,11 +140,11 @@ word_dup( char *w )
|
|
|
char *
|
|
char *
|
|
|
phonetic( char *s )
|
|
phonetic( char *s )
|
|
|
{
|
|
{
|
|
|
- char code, adjacent, ch;
|
|
|
|
|
- char *p;
|
|
|
|
|
- char **c;
|
|
|
|
|
- int i, cmax;
|
|
|
|
|
- char phoneme[MAXPHONEMELEN + 1];
|
|
|
|
|
|
|
+ char code, adjacent, ch;
|
|
|
|
|
+ char *p;
|
|
|
|
|
+ char **c;
|
|
|
|
|
+ int i, cmax;
|
|
|
|
|
+ char phoneme[MAXPHONEMELEN + 1];
|
|
|
|
|
|
|
|
p = s;
|
|
p = s;
|
|
|
if ( p == NULL || *p == '\0' ) {
|
|
if ( p == NULL || *p == '\0' ) {
|
|
@@ -152,18 +152,18 @@ phonetic( char *s )
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
adjacent = '0';
|
|
adjacent = '0';
|
|
|
- phoneme[0] = TOUPPER(*p);
|
|
|
|
|
|
|
+ phoneme[0] = TOUPPER(*p);
|
|
|
|
|
|
|
|
- phoneme[1] = '\0';
|
|
|
|
|
|
|
+ phoneme[1] = '\0';
|
|
|
for ( i = 0; i < 99 && (! iswordbreak(p)); LDAP_UTF8INC( p )) {
|
|
for ( i = 0; i < 99 && (! iswordbreak(p)); LDAP_UTF8INC( p )) {
|
|
|
- ch = TOUPPER (*p);
|
|
|
|
|
|
|
+ ch = TOUPPER (*p);
|
|
|
|
|
|
|
|
code = '0';
|
|
code = '0';
|
|
|
|
|
|
|
|
switch (ch) {
|
|
switch (ch) {
|
|
|
case 'B':
|
|
case 'B':
|
|
|
case 'F':
|
|
case 'F':
|
|
|
- case 'P':
|
|
|
|
|
|
|
+ case 'P':
|
|
|
case 'V':
|
|
case 'V':
|
|
|
code = (adjacent != '1') ? '1' : '0';
|
|
code = (adjacent != '1') ? '1' : '0';
|
|
|
break;
|
|
break;
|
|
@@ -196,18 +196,18 @@ phonetic( char *s )
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if ( i == 0 ) {
|
|
if ( i == 0 ) {
|
|
|
- adjacent = code;
|
|
|
|
|
- i++;
|
|
|
|
|
- } else if ( code != '0' ) {
|
|
|
|
|
- if ( i == MAXPHONEMELEN )
|
|
|
|
|
- break;
|
|
|
|
|
|
|
+ adjacent = code;
|
|
|
|
|
+ i++;
|
|
|
|
|
+ } else if ( code != '0' ) {
|
|
|
|
|
+ if ( i == MAXPHONEMELEN )
|
|
|
|
|
+ break;
|
|
|
adjacent = phoneme[i] = code;
|
|
adjacent = phoneme[i] = code;
|
|
|
i++;
|
|
i++;
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if ( i > 0 )
|
|
|
|
|
- phoneme[i] = '\0';
|
|
|
|
|
|
|
+ if ( i > 0 )
|
|
|
|
|
+ phoneme[i] = '\0';
|
|
|
|
|
|
|
|
return( slapi_ch_strdup( phoneme ) );
|
|
return( slapi_ch_strdup( phoneme ) );
|
|
|
}
|
|
}
|
|
@@ -224,274 +224,274 @@ phonetic( char *s )
|
|
|
|
|
|
|
|
/* Character coding array */
|
|
/* Character coding array */
|
|
|
static char vsvfn[26] = {
|
|
static char vsvfn[26] = {
|
|
|
- 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2,
|
|
|
|
|
- /* A B C D E F G H I J K L M */
|
|
|
|
|
- 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0};
|
|
|
|
|
- /* N O P Q R S T U V W X Y Z */
|
|
|
|
|
|
|
+ 1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2,
|
|
|
|
|
+ /* A B C D E F G H I J K L M */
|
|
|
|
|
+ 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0};
|
|
|
|
|
+ /* N O P Q R S T U V W X Y Z */
|
|
|
|
|
|
|
|
/* Macros to access character coding array */
|
|
/* Macros to access character coding array */
|
|
|
-#define vowel(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 1) /* AEIOU */
|
|
|
|
|
-#define same(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 2) /* FJLMNR */
|
|
|
|
|
-#define varson(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 4) /* CGPST */
|
|
|
|
|
-#define frontv(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 8) /* EIY */
|
|
|
|
|
-#define noghf(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 16) /* BDH */
|
|
|
|
|
|
|
+#define vowel(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 1) /* AEIOU */
|
|
|
|
|
+#define same(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 2) /* FJLMNR */
|
|
|
|
|
+#define varson(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 4) /* CGPST */
|
|
|
|
|
+#define frontv(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 8) /* EIY */
|
|
|
|
|
+#define noghf(x) ((x) != '\0' && vsvfn[(x) - 'A'] & 16) /* BDH */
|
|
|
|
|
|
|
|
char *
|
|
char *
|
|
|
phonetic( char *Word )
|
|
phonetic( char *Word )
|
|
|
{
|
|
{
|
|
|
- char *n, *n_start, *n_end; /* pointers to string */
|
|
|
|
|
- char *metaph_end; /* pointers to metaph */
|
|
|
|
|
- char ntrans[42]; /* word with uppercase letters */
|
|
|
|
|
- int KSflag; /* state flag for X -> KS */
|
|
|
|
|
- char buf[MAXPHONEMELEN + 2];
|
|
|
|
|
- char *Metaph;
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * Copy Word to internal buffer, dropping non-alphabetic characters
|
|
|
|
|
- * and converting to upper case
|
|
|
|
|
- */
|
|
|
|
|
- n = ntrans + 4; n_end = ntrans + 35;
|
|
|
|
|
- while (!iswordbreak( Word ) && n < n_end) {
|
|
|
|
|
- if (isascii(*Word)) {
|
|
|
|
|
- if (isalpha(*Word)) {
|
|
|
|
|
- *n++ = TOUPPER(*Word);
|
|
|
|
|
- }
|
|
|
|
|
- ++Word;
|
|
|
|
|
- } else {
|
|
|
|
|
- auto const size_t len = LDAP_UTF8COPY(n, Word);
|
|
|
|
|
- n += len; Word += len;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- Metaph = buf;
|
|
|
|
|
- *Metaph = '\0';
|
|
|
|
|
- if (n == ntrans + 4) {
|
|
|
|
|
- return( slapi_ch_strdup( buf ) ); /* Return if null */
|
|
|
|
|
- }
|
|
|
|
|
- n_end = n; /* Set n_end to end of string */
|
|
|
|
|
-
|
|
|
|
|
- /* ntrans[0] will always be == 0 */
|
|
|
|
|
- ntrans[0] = '\0';
|
|
|
|
|
- ntrans[1] = '\0';
|
|
|
|
|
- ntrans[2] = '\0';
|
|
|
|
|
- ntrans[3] = '\0';
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- *n = 0; /* Pad with nulls */
|
|
|
|
|
- n = ntrans + 4; /* Assign pointer to start */
|
|
|
|
|
-
|
|
|
|
|
- /* Check for PN, KN, GN, AE, WR, WH, and X at start */
|
|
|
|
|
- switch (*n) {
|
|
|
|
|
- case 'P':
|
|
|
|
|
- case 'K':
|
|
|
|
|
- case 'G':
|
|
|
|
|
- /* 'PN', 'KN', 'GN' becomes 'N' */
|
|
|
|
|
- if (*(n + 1) == 'N')
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- break;
|
|
|
|
|
- case 'A':
|
|
|
|
|
- /* 'AE' becomes 'E' */
|
|
|
|
|
- if (*(n + 1) == 'E')
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- break;
|
|
|
|
|
- case 'W':
|
|
|
|
|
- /* 'WR' becomes 'R', and 'WH' to 'H' */
|
|
|
|
|
- if (*(n + 1) == 'R')
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- else if (*(n + 1) == 'H') {
|
|
|
|
|
- *(n + 1) = *n;
|
|
|
|
|
- *n++ = 0;
|
|
|
|
|
- }
|
|
|
|
|
- break;
|
|
|
|
|
- case 'X':
|
|
|
|
|
- /* 'X' becomes 'S' */
|
|
|
|
|
- *n = 'S';
|
|
|
|
|
- break;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * Now, loop step through string, stopping at end of string or when
|
|
|
|
|
- * the computed 'metaph' is MAXPHONEMELEN characters long
|
|
|
|
|
- */
|
|
|
|
|
-
|
|
|
|
|
- KSflag = 0; /* state flag for KS translation */
|
|
|
|
|
- for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
|
|
|
|
|
- n <= n_end && Metaph < metaph_end; n++) {
|
|
|
|
|
- if (KSflag) {
|
|
|
|
|
- KSflag = 0;
|
|
|
|
|
- *Metaph++ = 'S';
|
|
|
|
|
- } else if (!isascii(*n)) {
|
|
|
|
|
- *Metaph++ = *n;
|
|
|
|
|
- } else {
|
|
|
|
|
- /* Drop duplicates except for CC */
|
|
|
|
|
- if (*(n - 1) == *n && *n != 'C')
|
|
|
|
|
- continue;
|
|
|
|
|
- /* Check for F J L M N R or first letter vowel */
|
|
|
|
|
- if (same(*n) || (n == n_start && vowel(*n))) {
|
|
|
|
|
- *Metaph++ = *n;
|
|
|
|
|
- } else {
|
|
|
|
|
- switch (*n) {
|
|
|
|
|
- case 'B':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * B unless in -MB
|
|
|
|
|
- */
|
|
|
|
|
- if (n < (n_end - 1) && *(n - 1) != 'M') {
|
|
|
|
|
- *Metaph++ = *n;
|
|
|
|
|
- }
|
|
|
|
|
- break;
|
|
|
|
|
- case 'C':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * X if in -CIA-, -CH- else S if in
|
|
|
|
|
- * -CI-, -CE-, -CY- else dropped if
|
|
|
|
|
- * in -SCI-, -SCE-, -SCY- else K
|
|
|
|
|
- */
|
|
|
|
|
- if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
|
|
|
|
|
- if (*(n + 1) == 'I' && *(n + 2) == 'A') {
|
|
|
|
|
- *Metaph++ = 'X';
|
|
|
|
|
- } else if (frontv(*(n + 1))) {
|
|
|
|
|
- *Metaph++ = 'S';
|
|
|
|
|
- } else if (*(n + 1) == 'H') {
|
|
|
|
|
- *Metaph++ = ((n == n_start && !vowel(*(n + 2)))
|
|
|
|
|
- || *(n - 1) == 'S')
|
|
|
|
|
- ? (char) 'K' : (char) 'X';
|
|
|
|
|
- } else {
|
|
|
|
|
- *Metaph++ = 'K';
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- break;
|
|
|
|
|
- case 'D':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * J if in DGE or DGI or DGY else T
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
|
|
|
|
|
- ? (char) 'J' : (char) 'T';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'G':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * F if in -GH and not B--GH, D--GH,
|
|
|
|
|
- * -H--GH, -H---GH else dropped if
|
|
|
|
|
- * -GNED, -GN, -DGE-, -DGI-, -DGY-
|
|
|
|
|
- * else J if in -GE-, -GI-, -GY- and
|
|
|
|
|
- * not GG else K
|
|
|
|
|
- */
|
|
|
|
|
- if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
|
|
|
|
|
- (*(n + 1) != 'N' || ((n + 1) < n_end &&
|
|
|
|
|
- (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
|
|
|
|
|
- (*(n - 1) != 'D' || !frontv(*(n + 1))))
|
|
|
|
|
- *Metaph++ = (frontv(*(n + 1)) &&
|
|
|
|
|
- *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
|
|
|
|
|
- else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
|
|
|
|
|
- *(n - 4) != 'H')
|
|
|
|
|
- *Metaph++ = 'F';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'H':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * H if before a vowel and not after
|
|
|
|
|
- * C, G, P, S, T else dropped
|
|
|
|
|
- */
|
|
|
|
|
- if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
|
|
|
|
|
- vowel(*(n + 1))))
|
|
|
|
|
- *Metaph++ = 'H';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'K':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * dropped if after C else K
|
|
|
|
|
- */
|
|
|
|
|
- if (*(n - 1) != 'C')
|
|
|
|
|
- *Metaph++ = 'K';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'P':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * F if before H, else P
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = *(n + 1) == 'H' ?
|
|
|
|
|
- (char) 'F' : (char) 'P';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'Q':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * K
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = 'K';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'S':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * X in -SH-, -SIO- or -SIA- else S
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = (*(n + 1) == 'H' ||
|
|
|
|
|
- (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
|
|
|
|
|
- *(n + 2) == 'A')))
|
|
|
|
|
- ? (char) 'X' : (char) 'S';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'T':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * X in -TIA- or -TIO- else 0 (zero)
|
|
|
|
|
- * before H else dropped if in -TCH-
|
|
|
|
|
- * else T
|
|
|
|
|
- */
|
|
|
|
|
- if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
|
|
|
|
|
- *(n + 2) == 'A'))
|
|
|
|
|
- *Metaph++ = 'X';
|
|
|
|
|
- else if (*(n + 1) == 'H')
|
|
|
|
|
- *Metaph++ = '0';
|
|
|
|
|
- else if (*(n + 1) != 'C' || *(n + 2) != 'H')
|
|
|
|
|
- *Metaph++ = 'T';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'V':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * F
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = 'F';
|
|
|
|
|
- break;
|
|
|
|
|
- case 'W':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * W after a vowel, else dropped
|
|
|
|
|
- */
|
|
|
|
|
- case 'Y':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * Y unless followed by a vowel
|
|
|
|
|
- */
|
|
|
|
|
- if (vowel(*(n + 1)))
|
|
|
|
|
- *Metaph++ = *n;
|
|
|
|
|
- break;
|
|
|
|
|
- case 'X':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * KS
|
|
|
|
|
- */
|
|
|
|
|
- if (n == n_start)
|
|
|
|
|
- *Metaph++ = 'S';
|
|
|
|
|
- else {
|
|
|
|
|
- *Metaph++ = 'K'; /* Insert K, then S */
|
|
|
|
|
- KSflag = 1;
|
|
|
|
|
- }
|
|
|
|
|
- break;
|
|
|
|
|
- case 'Z':
|
|
|
|
|
-
|
|
|
|
|
- /*
|
|
|
|
|
- * S
|
|
|
|
|
- */
|
|
|
|
|
- *Metaph++ = 'S';
|
|
|
|
|
- break;
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- *Metaph = 0; /* Null terminate */
|
|
|
|
|
- return( slapi_ch_strdup( buf ) );
|
|
|
|
|
|
|
+ char *n, *n_start, *n_end; /* pointers to string */
|
|
|
|
|
+ char *metaph_end; /* pointers to metaph */
|
|
|
|
|
+ char ntrans[42]; /* word with uppercase letters */
|
|
|
|
|
+ int KSflag; /* state flag for X -> KS */
|
|
|
|
|
+ char buf[MAXPHONEMELEN + 2];
|
|
|
|
|
+ char *Metaph;
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * Copy Word to internal buffer, dropping non-alphabetic characters
|
|
|
|
|
+ * and converting to upper case
|
|
|
|
|
+ */
|
|
|
|
|
+ n = ntrans + 4; n_end = ntrans + 35;
|
|
|
|
|
+ while (!iswordbreak( Word ) && n < n_end) {
|
|
|
|
|
+ if (isascii(*Word)) {
|
|
|
|
|
+ if (isalpha(*Word)) {
|
|
|
|
|
+ *n++ = TOUPPER(*Word);
|
|
|
|
|
+ }
|
|
|
|
|
+ ++Word;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ auto const size_t len = LDAP_UTF8COPY(n, Word);
|
|
|
|
|
+ n += len; Word += len;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ Metaph = buf;
|
|
|
|
|
+ *Metaph = '\0';
|
|
|
|
|
+ if (n == ntrans + 4) {
|
|
|
|
|
+ return( slapi_ch_strdup( buf ) ); /* Return if null */
|
|
|
|
|
+ }
|
|
|
|
|
+ n_end = n; /* Set n_end to end of string */
|
|
|
|
|
+
|
|
|
|
|
+ /* ntrans[0] will always be == 0 */
|
|
|
|
|
+ ntrans[0] = '\0';
|
|
|
|
|
+ ntrans[1] = '\0';
|
|
|
|
|
+ ntrans[2] = '\0';
|
|
|
|
|
+ ntrans[3] = '\0';
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ *n = 0; /* Pad with nulls */
|
|
|
|
|
+ n = ntrans + 4; /* Assign pointer to start */
|
|
|
|
|
+
|
|
|
|
|
+ /* Check for PN, KN, GN, AE, WR, WH, and X at start */
|
|
|
|
|
+ switch (*n) {
|
|
|
|
|
+ case 'P':
|
|
|
|
|
+ case 'K':
|
|
|
|
|
+ case 'G':
|
|
|
|
|
+ /* 'PN', 'KN', 'GN' becomes 'N' */
|
|
|
|
|
+ if (*(n + 1) == 'N')
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'A':
|
|
|
|
|
+ /* 'AE' becomes 'E' */
|
|
|
|
|
+ if (*(n + 1) == 'E')
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'W':
|
|
|
|
|
+ /* 'WR' becomes 'R', and 'WH' to 'H' */
|
|
|
|
|
+ if (*(n + 1) == 'R')
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ else if (*(n + 1) == 'H') {
|
|
|
|
|
+ *(n + 1) = *n;
|
|
|
|
|
+ *n++ = 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'X':
|
|
|
|
|
+ /* 'X' becomes 'S' */
|
|
|
|
|
+ *n = 'S';
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * Now, loop step through string, stopping at end of string or when
|
|
|
|
|
+ * the computed 'metaph' is MAXPHONEMELEN characters long
|
|
|
|
|
+ */
|
|
|
|
|
+
|
|
|
|
|
+ KSflag = 0; /* state flag for KS translation */
|
|
|
|
|
+ for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
|
|
|
|
|
+ n <= n_end && Metaph < metaph_end; n++) {
|
|
|
|
|
+ if (KSflag) {
|
|
|
|
|
+ KSflag = 0;
|
|
|
|
|
+ *Metaph++ = 'S';
|
|
|
|
|
+ } else if (!isascii(*n)) {
|
|
|
|
|
+ *Metaph++ = *n;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ /* Drop duplicates except for CC */
|
|
|
|
|
+ if (*(n - 1) == *n && *n != 'C')
|
|
|
|
|
+ continue;
|
|
|
|
|
+ /* Check for F J L M N R or first letter vowel */
|
|
|
|
|
+ if (same(*n) || (n == n_start && vowel(*n))) {
|
|
|
|
|
+ *Metaph++ = *n;
|
|
|
|
|
+ } else {
|
|
|
|
|
+ switch (*n) {
|
|
|
|
|
+ case 'B':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * B unless in -MB
|
|
|
|
|
+ */
|
|
|
|
|
+ if (n < (n_end - 1) && *(n - 1) != 'M') {
|
|
|
|
|
+ *Metaph++ = *n;
|
|
|
|
|
+ }
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'C':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * X if in -CIA-, -CH- else S if in
|
|
|
|
|
+ * -CI-, -CE-, -CY- else dropped if
|
|
|
|
|
+ * in -SCI-, -SCE-, -SCY- else K
|
|
|
|
|
+ */
|
|
|
|
|
+ if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
|
|
|
|
|
+ if (*(n + 1) == 'I' && *(n + 2) == 'A') {
|
|
|
|
|
+ *Metaph++ = 'X';
|
|
|
|
|
+ } else if (frontv(*(n + 1))) {
|
|
|
|
|
+ *Metaph++ = 'S';
|
|
|
|
|
+ } else if (*(n + 1) == 'H') {
|
|
|
|
|
+ *Metaph++ = ((n == n_start && !vowel(*(n + 2)))
|
|
|
|
|
+ || *(n - 1) == 'S')
|
|
|
|
|
+ ? (char) 'K' : (char) 'X';
|
|
|
|
|
+ } else {
|
|
|
|
|
+ *Metaph++ = 'K';
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'D':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * J if in DGE or DGI or DGY else T
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
|
|
|
|
|
+ ? (char) 'J' : (char) 'T';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'G':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * F if in -GH and not B--GH, D--GH,
|
|
|
|
|
+ * -H--GH, -H---GH else dropped if
|
|
|
|
|
+ * -GNED, -GN, -DGE-, -DGI-, -DGY-
|
|
|
|
|
+ * else J if in -GE-, -GI-, -GY- and
|
|
|
|
|
+ * not GG else K
|
|
|
|
|
+ */
|
|
|
|
|
+ if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
|
|
|
|
|
+ (*(n + 1) != 'N' || ((n + 1) < n_end &&
|
|
|
|
|
+ (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
|
|
|
|
|
+ (*(n - 1) != 'D' || !frontv(*(n + 1))))
|
|
|
|
|
+ *Metaph++ = (frontv(*(n + 1)) &&
|
|
|
|
|
+ *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
|
|
|
|
|
+ else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
|
|
|
|
|
+ *(n - 4) != 'H')
|
|
|
|
|
+ *Metaph++ = 'F';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'H':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * H if before a vowel and not after
|
|
|
|
|
+ * C, G, P, S, T else dropped
|
|
|
|
|
+ */
|
|
|
|
|
+ if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
|
|
|
|
|
+ vowel(*(n + 1))))
|
|
|
|
|
+ *Metaph++ = 'H';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'K':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * dropped if after C else K
|
|
|
|
|
+ */
|
|
|
|
|
+ if (*(n - 1) != 'C')
|
|
|
|
|
+ *Metaph++ = 'K';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'P':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * F if before H, else P
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = *(n + 1) == 'H' ?
|
|
|
|
|
+ (char) 'F' : (char) 'P';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'Q':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * K
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = 'K';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'S':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * X in -SH-, -SIO- or -SIA- else S
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = (*(n + 1) == 'H' ||
|
|
|
|
|
+ (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
|
|
|
|
|
+ *(n + 2) == 'A')))
|
|
|
|
|
+ ? (char) 'X' : (char) 'S';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'T':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * X in -TIA- or -TIO- else 0 (zero)
|
|
|
|
|
+ * before H else dropped if in -TCH-
|
|
|
|
|
+ * else T
|
|
|
|
|
+ */
|
|
|
|
|
+ if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
|
|
|
|
|
+ *(n + 2) == 'A'))
|
|
|
|
|
+ *Metaph++ = 'X';
|
|
|
|
|
+ else if (*(n + 1) == 'H')
|
|
|
|
|
+ *Metaph++ = '0';
|
|
|
|
|
+ else if (*(n + 1) != 'C' || *(n + 2) != 'H')
|
|
|
|
|
+ *Metaph++ = 'T';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'V':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * F
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = 'F';
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'W':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * W after a vowel, else dropped
|
|
|
|
|
+ */
|
|
|
|
|
+ case 'Y':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * Y unless followed by a vowel
|
|
|
|
|
+ */
|
|
|
|
|
+ if (vowel(*(n + 1)))
|
|
|
|
|
+ *Metaph++ = *n;
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'X':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * KS
|
|
|
|
|
+ */
|
|
|
|
|
+ if (n == n_start)
|
|
|
|
|
+ *Metaph++ = 'S';
|
|
|
|
|
+ else {
|
|
|
|
|
+ *Metaph++ = 'K'; /* Insert K, then S */
|
|
|
|
|
+ KSflag = 1;
|
|
|
|
|
+ }
|
|
|
|
|
+ break;
|
|
|
|
|
+ case 'Z':
|
|
|
|
|
+
|
|
|
|
|
+ /*
|
|
|
|
|
+ * S
|
|
|
|
|
+ */
|
|
|
|
|
+ *Metaph++ = 'S';
|
|
|
|
|
+ break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ *Metaph = 0; /* Null terminate */
|
|
|
|
|
+ return( slapi_ch_strdup( buf ) );
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
#endif /* METAPHONE */
|
|
#endif /* METAPHONE */
|