sync with OpenBSD -current

This commit is contained in:
purplerain 2023-11-22 20:51:44 +00:00
parent 1abf3d5d6c
commit 10cf24ada0
Signed by: purplerain
GPG key ID: F42C07F07E2E35B7
40 changed files with 462 additions and 489 deletions

View file

@ -1,4 +1,4 @@
/* $OpenBSD: b.c,v 1.47 2023/11/15 18:56:53 millert Exp $ */
/* $OpenBSD: b.c,v 1.48 2023/11/22 01:01:21 millert Exp $ */
/****************************************************************
Copyright (C) Lucent Technologies 1997
All Rights Reserved
@ -770,59 +770,6 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
#define MAX_UTF_BYTES 4 // UTF-8 is up to 4 bytes long
// Read one rune at a time from the given FILE*. Return both
// the bytes and the actual rune.
struct runedata {
int rune;
size_t len;
char bytes[6];
};
struct runedata getrune(FILE *fp)
{
struct runedata result;
int c, i, next;
memset(&result, 0, sizeof(result));
c = getc(fp);
if (c == EOF)
return result; // result.rune == 0 --> EOF
else if (c < 128 || awk_mb_cur_max == 1) {
result.bytes[0] = c;
result.len = 1;
result.rune = c;
return result;
}
// need to get bytes and fill things in
result.bytes[0] = c;
result.len = 1;
next = 1;
for (i = 1; i < MAX_UTF_BYTES; i++) {
c = getc(fp);
if (c == EOF)
break;
result.bytes[next++] = c;
result.len++;
}
// put back any extra input bytes
int actual_len = u8_nextlen(result.bytes);
while (result.len > actual_len) {
ungetc(result.bytes[--result.len], fp);
}
result.bytes[result.len] = '\0';
(void) u8_rune(& result.rune, (uschar *) result.bytes);
return result;
}
/*
* NAME
* fnematch
@ -840,60 +787,76 @@ struct runedata getrune(FILE *fp)
bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
{
char *buf = *pbuf;
char *i, *j, *k, *buf = *pbuf;
int bufsize = *pbufsize;
int i, j, k, ns, s;
struct runedata r;
int c, n, ns, s;
s = pfa->initstat;
patlen = 0;
/*
* All indices relative to buf.
* i <= j <= k <= bufsize
* buf <= i <= j <= k <= buf+bufsize
*
* i: origin of active substring (first byte of first character)
* j: current character (last byte of current character)
* k: destination of next getc()
* i: origin of active substring
* j: current character
* k: destination of the next getc
*/
i = -1, k = 0;
do {
j = i++;
do {
r = getrune(f);
if (r.len == 0) {
r.len = 1; // store NUL byte for EOF
}
j += r.len;
if (j >= bufsize) {
if (!adjbuf(&buf, &bufsize, j+1, quantum, 0, "fnematch"))
FATAL("stream '%.30s...' too long", buf);
}
memcpy(buf + k, r.bytes, r.len);
k += r.len;
if ((ns = get_gototab(pfa, s, r.rune)) != 0)
s = ns;
else
s = cgoto(pfa, s, r.rune);
i = j = k = buf;
if (pfa->out[s]) { /* final state */
patlen = j - i + 1;
if (r.rune == 0) /* don't count $ */
patlen--;
do {
/*
* Call u8_rune with at least MAX_UTF_BYTES ahead in
* the buffer until EOF interferes.
*/
if (k - j < MAX_UTF_BYTES) {
if (k + MAX_UTF_BYTES > buf + bufsize) {
adjbuf(&buf, &bufsize,
bufsize + MAX_UTF_BYTES,
quantum, 0, "fnematch");
}
} while (buf[j] && s != 1);
for (n = MAX_UTF_BYTES ; n > 0; n--) {
*k++ = (c = getc(f)) != EOF ? c : 0;
if (c == EOF) {
if (ferror(f))
FATAL("fnematch: getc error");
break;
}
}
}
j += u8_rune(&c, (uschar *)j);
if ((ns = get_gototab(pfa, s, c)) != 0)
s = ns;
else
s = cgoto(pfa, s, c);
if (pfa->out[s]) { /* final state */
patbeg = i;
patlen = j - i;
if (c == 0) /* don't count $ */
patlen--;
}
if (c && s != 1)
continue; /* origin i still viable, next j */
if (patlen)
break; /* best match found */
/* no match at origin i, next i and start over */
i += u8_rune(&c, (uschar *)i);
if (c == 0)
break; /* no match */
j = i;
s = 2;
if (r.len > 1)
i += r.len - 1; // i incremented around the loop
} while (buf[i] && !patlen);
} while (1);
/* adjbuf() may have relocated a resized buffer. Inform the world. */
*pbuf = buf;
*pbufsize = bufsize;
if (patlen) {
patbeg = buf + i;
/*
* Under no circumstances is the last character fed to
* the automaton part of the match. It is EOF's nullbyte,
@ -905,10 +868,11 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
* (except for EOF's nullbyte, if present) and null
* terminate the buffer.
*/
for (; r.len > 0; r.len--)
if (buf[--k] && ungetc(buf[k], f) == EOF)
FATAL("unable to ungetc '%c'", buf[k]);
buf[k-patlen] = '\0';
do
if (*--k && ungetc(*k, f) == EOF)
FATAL("unable to ungetc '%c'", *k);
while (k > patbeg + patlen);
*k = '\0';
return true;
}
else