sync with OpenBSD -current
This commit is contained in:
parent
1abf3d5d6c
commit
10cf24ada0
40 changed files with 462 additions and 489 deletions
152
usr.bin/awk/b.c
152
usr.bin/awk/b.c
|
@ -1,4 +1,4 @@
|
|||
/* $OpenBSD: b.c,v 1.47 2023/11/15 18:56:53 millert Exp $ */
|
||||
/* $OpenBSD: b.c,v 1.48 2023/11/22 01:01:21 millert Exp $ */
|
||||
/****************************************************************
|
||||
Copyright (C) Lucent Technologies 1997
|
||||
All Rights Reserved
|
||||
|
@ -770,59 +770,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
|||
|
||||
#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long
|
||||
|
||||
// Read one rune at a time from the given FILE*. Return both
|
||||
// the bytes and the actual rune.
|
||||
|
||||
struct runedata {
|
||||
int rune;
|
||||
size_t len;
|
||||
char bytes[6];
|
||||
};
|
||||
|
||||
struct runedata getrune(FILE *fp)
|
||||
{
|
||||
struct runedata result;
|
||||
int c, i, next;
|
||||
|
||||
memset(&result, 0, sizeof(result));
|
||||
|
||||
c = getc(fp);
|
||||
if (c == EOF)
|
||||
return result; // result.rune == 0 --> EOF
|
||||
else if (c < 128 || awk_mb_cur_max == 1) {
|
||||
result.bytes[0] = c;
|
||||
result.len = 1;
|
||||
result.rune = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// need to get bytes and fill things in
|
||||
result.bytes[0] = c;
|
||||
result.len = 1;
|
||||
|
||||
next = 1;
|
||||
for (i = 1; i < MAX_UTF_BYTES; i++) {
|
||||
c = getc(fp);
|
||||
if (c == EOF)
|
||||
break;
|
||||
result.bytes[next++] = c;
|
||||
result.len++;
|
||||
}
|
||||
|
||||
// put back any extra input bytes
|
||||
int actual_len = u8_nextlen(result.bytes);
|
||||
while (result.len > actual_len) {
|
||||
ungetc(result.bytes[--result.len], fp);
|
||||
}
|
||||
|
||||
result.bytes[result.len] = '\0';
|
||||
(void) u8_rune(& result.rune, (uschar *) result.bytes);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* NAME
|
||||
* fnematch
|
||||
|
@ -840,60 +787,76 @@ struct runedata getrune(FILE *fp)
|
|||
|
||||
bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
||||
{
|
||||
char *buf = *pbuf;
|
||||
char *i, *j, *k, *buf = *pbuf;
|
||||
int bufsize = *pbufsize;
|
||||
int i, j, k, ns, s;
|
||||
struct runedata r;
|
||||
int c, n, ns, s;
|
||||
|
||||
s = pfa->initstat;
|
||||
patlen = 0;
|
||||
|
||||
/*
|
||||
* All indices relative to buf.
|
||||
* i <= j <= k <= bufsize
|
||||
* buf <= i <= j <= k <= buf+bufsize
|
||||
*
|
||||
* i: origin of active substring (first byte of first character)
|
||||
* j: current character (last byte of current character)
|
||||
* k: destination of next getc()
|
||||
* i: origin of active substring
|
||||
* j: current character
|
||||
* k: destination of the next getc
|
||||
*/
|
||||
i = -1, k = 0;
|
||||
do {
|
||||
j = i++;
|
||||
do {
|
||||
r = getrune(f);
|
||||
if (r.len == 0) {
|
||||
r.len = 1; // store NUL byte for EOF
|
||||
}
|
||||
j += r.len;
|
||||
if (j >= bufsize) {
|
||||
if (!adjbuf(&buf, &bufsize, j+1, quantum, 0, "fnematch"))
|
||||
FATAL("stream '%.30s...' too long", buf);
|
||||
}
|
||||
memcpy(buf + k, r.bytes, r.len);
|
||||
k += r.len;
|
||||
|
||||
if ((ns = get_gototab(pfa, s, r.rune)) != 0)
|
||||
s = ns;
|
||||
else
|
||||
s = cgoto(pfa, s, r.rune);
|
||||
i = j = k = buf;
|
||||
|
||||
if (pfa->out[s]) { /* final state */
|
||||
patlen = j - i + 1;
|
||||
if (r.rune == 0) /* don't count $ */
|
||||
patlen--;
|
||||
do {
|
||||
/*
|
||||
* Call u8_rune with at least MAX_UTF_BYTES ahead in
|
||||
* the buffer until EOF interferes.
|
||||
*/
|
||||
if (k - j < MAX_UTF_BYTES) {
|
||||
if (k + MAX_UTF_BYTES > buf + bufsize) {
|
||||
adjbuf(&buf, &bufsize,
|
||||
bufsize + MAX_UTF_BYTES,
|
||||
quantum, 0, "fnematch");
|
||||
}
|
||||
} while (buf[j] && s != 1);
|
||||
for (n = MAX_UTF_BYTES ; n > 0; n--) {
|
||||
*k++ = (c = getc(f)) != EOF ? c : 0;
|
||||
if (c == EOF) {
|
||||
if (ferror(f))
|
||||
FATAL("fnematch: getc error");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
j += u8_rune(&c, (uschar *)j);
|
||||
|
||||
if ((ns = get_gototab(pfa, s, c)) != 0)
|
||||
s = ns;
|
||||
else
|
||||
s = cgoto(pfa, s, c);
|
||||
|
||||
if (pfa->out[s]) { /* final state */
|
||||
patbeg = i;
|
||||
patlen = j - i;
|
||||
if (c == 0) /* don't count $ */
|
||||
patlen--;
|
||||
}
|
||||
|
||||
if (c && s != 1)
|
||||
continue; /* origin i still viable, next j */
|
||||
if (patlen)
|
||||
break; /* best match found */
|
||||
|
||||
/* no match at origin i, next i and start over */
|
||||
i += u8_rune(&c, (uschar *)i);
|
||||
if (c == 0)
|
||||
break; /* no match */
|
||||
j = i;
|
||||
s = 2;
|
||||
if (r.len > 1)
|
||||
i += r.len - 1; // i incremented around the loop
|
||||
} while (buf[i] && !patlen);
|
||||
} while (1);
|
||||
|
||||
/* adjbuf() may have relocated a resized buffer. Inform the world. */
|
||||
*pbuf = buf;
|
||||
*pbufsize = bufsize;
|
||||
|
||||
if (patlen) {
|
||||
patbeg = buf + i;
|
||||
/*
|
||||
* Under no circumstances is the last character fed to
|
||||
* the automaton part of the match. It is EOF's nullbyte,
|
||||
|
@ -905,10 +868,11 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
|||
* (except for EOF's nullbyte, if present) and null
|
||||
* terminate the buffer.
|
||||
*/
|
||||
for (; r.len > 0; r.len--)
|
||||
if (buf[--k] && ungetc(buf[k], f) == EOF)
|
||||
FATAL("unable to ungetc '%c'", buf[k]);
|
||||
buf[k-patlen] = '\0';
|
||||
do
|
||||
if (*--k && ungetc(*k, f) == EOF)
|
||||
FATAL("unable to ungetc '%c'", *k);
|
||||
while (k > patbeg + patlen);
|
||||
*k = '\0';
|
||||
return true;
|
||||
}
|
||||
else
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue