sync code with last improvements from OpenBSD
This commit is contained in:
parent
f0c5a45f3a
commit
6dffc8ab2a
28 changed files with 2476 additions and 1648 deletions
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: lapic.c,v 1.70 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: lapic.c,v 1.71 2023/09/17 14:50:50 cheloha Exp $ */
|
||||||
/* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */
|
/* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
@ -499,7 +499,6 @@ lapic_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: clock.c,v 1.41 2023/08/23 01:55:46 cheloha Exp $ */
|
/* $OpenBSD: clock.c,v 1.42 2023/09/17 14:50:50 cheloha Exp $ */
|
||||||
/* $NetBSD: clock.c,v 1.1 2003/04/26 18:39:50 fvdl Exp $ */
|
/* $NetBSD: clock.c,v 1.1 2003/04/26 18:39:50 fvdl Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
@ -283,7 +283,6 @@ i8254_initclocks(void)
|
||||||
|
|
||||||
stathz = 128;
|
stathz = 128;
|
||||||
profhz = 1024; /* XXX does not divide into 1 billion */
|
profhz = 1024; /* XXX does not divide into 1 billion */
|
||||||
clockintr_init(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: agtimer.c,v 1.20 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: agtimer.c,v 1.21 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
||||||
* Copyright (c) 2013 Patrick Wildt <patrick@blueri.se>
|
* Copyright (c) 2013 Patrick Wildt <patrick@blueri.se>
|
||||||
|
@ -231,7 +231,6 @@ agtimer_cpu_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
if (sc->sc_ticks_per_second != agtimer_frequency) {
|
if (sc->sc_ticks_per_second != agtimer_frequency) {
|
||||||
agtimer_set_clockrate(agtimer_frequency);
|
agtimer_set_clockrate(agtimer_frequency);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: amptimer.c,v 1.19 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: amptimer.c,v 1.20 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
||||||
*
|
*
|
||||||
|
@ -288,7 +288,6 @@ amptimer_cpu_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = hz * 10;
|
profhz = hz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
if (sc->sc_ticks_per_second != amptimer_frequency) {
|
if (sc->sc_ticks_per_second != amptimer_frequency) {
|
||||||
amptimer_set_clockrate(amptimer_frequency);
|
amptimer_set_clockrate(amptimer_frequency);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: acpipci.c,v 1.40 2023/09/12 08:32:58 jmatthew Exp $ */
|
/* $OpenBSD: acpipci.c,v 1.41 2023/09/16 23:25:16 jmatthew Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018 Mark Kettenis
|
* Copyright (c) 2018 Mark Kettenis
|
||||||
*
|
*
|
||||||
|
@ -844,7 +844,8 @@ acpipci_iort_map(struct acpi_iort *iort, uint32_t offset, uint32_t id,
|
||||||
itsn = (struct acpi_iort_its_node *)&node[1];
|
itsn = (struct acpi_iort_its_node *)&node[1];
|
||||||
LIST_FOREACH(icl, &interrupt_controllers, ic_list) {
|
LIST_FOREACH(icl, &interrupt_controllers, ic_list) {
|
||||||
for (i = 0; i < itsn->number_of_itss; i++) {
|
for (i = 0; i < itsn->number_of_itss; i++) {
|
||||||
if (icl->ic_gic_its_id == itsn->its_ids[i]) {
|
if (icl->ic_establish_msi != NULL &&
|
||||||
|
icl->ic_gic_its_id == itsn->its_ids[i]) {
|
||||||
*ic = icl;
|
*ic = icl;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: agtimer.c,v 1.27 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: agtimer.c,v 1.28 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2011 Dale Rahn <drahn@openbsd.org>
|
||||||
* Copyright (c) 2013 Patrick Wildt <patrick@blueri.se>
|
* Copyright (c) 2013 Patrick Wildt <patrick@blueri.se>
|
||||||
|
@ -294,7 +294,6 @@ agtimer_cpu_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
if (sc->sc_ticks_per_second != agtimer_frequency) {
|
if (sc->sc_ticks_per_second != agtimer_frequency) {
|
||||||
agtimer_set_clockrate(agtimer_frequency);
|
agtimer_set_clockrate(agtimer_frequency);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: dmtimer.c,v 1.21 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: dmtimer.c,v 1.22 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
||||||
* Copyright (c) 2013 Raphael Graf <r@undefined.ch>
|
* Copyright (c) 2013 Raphael Graf <r@undefined.ch>
|
||||||
|
@ -233,7 +233,6 @@ dmtimer_cpu_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
sc->sc_ticks_per_second = TIMER_FREQUENCY; /* 32768 */
|
sc->sc_ticks_per_second = TIMER_FREQUENCY; /* 32768 */
|
||||||
sc->sc_nsec_cycle_ratio =
|
sc->sc_nsec_cycle_ratio =
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: gptimer.c,v 1.22 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: gptimer.c,v 1.23 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
||||||
*
|
*
|
||||||
|
@ -199,7 +199,6 @@ gptimer_cpu_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
gptimer_nsec_cycle_ratio = TIMER_FREQUENCY * (1ULL << 32) / 1000000000;
|
gptimer_nsec_cycle_ratio = TIMER_FREQUENCY * (1ULL << 32) / 1000000000;
|
||||||
gptimer_nsec_max = UINT64_MAX / gptimer_nsec_cycle_ratio;
|
gptimer_nsec_max = UINT64_MAX / gptimer_nsec_cycle_ratio;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: sxitimer.c,v 1.23 2023/09/14 19:39:47 cheloha Exp $ */
|
/* $OpenBSD: sxitimer.c,v 1.24 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2007,2009 Dale Rahn <drahn@openbsd.org>
|
||||||
* Copyright (c) 2013 Raphael Graf <r@undefined.ch>
|
* Copyright (c) 2013 Raphael Graf <r@undefined.ch>
|
||||||
|
@ -181,7 +181,6 @@ sxitimer_attach(struct device *parent, struct device *self, void *aux)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
|
|
||||||
/* stop timer, and set clk src */
|
/* stop timer, and set clk src */
|
||||||
bus_space_write_4(sxitimer_iot, sxitimer_ioh,
|
bus_space_write_4(sxitimer_iot, sxitimer_ioh,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: lapic.c,v 1.57 2023/09/14 19:39:48 cheloha Exp $ */
|
/* $OpenBSD: lapic.c,v 1.58 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */
|
/* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
@ -327,7 +327,6 @@ lapic_initclocks(void)
|
||||||
stathz = hz;
|
stathz = hz;
|
||||||
profhz = stathz * 10;
|
profhz = stathz * 10;
|
||||||
statclock_is_randomized = 1;
|
statclock_is_randomized = 1;
|
||||||
clockintr_init(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int gettick(void); /* XXX put in header file */
|
extern int gettick(void); /* XXX put in header file */
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: clock.c,v 1.67 2023/08/23 01:55:46 cheloha Exp $ */
|
/* $OpenBSD: clock.c,v 1.68 2023/09/17 14:50:51 cheloha Exp $ */
|
||||||
/* $NetBSD: clock.c,v 1.39 1996/05/12 23:11:54 mycroft Exp $ */
|
/* $NetBSD: clock.c,v 1.39 1996/05/12 23:11:54 mycroft Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
@ -426,7 +426,6 @@ i8254_initclocks(void)
|
||||||
|
|
||||||
stathz = 128;
|
stathz = 128;
|
||||||
profhz = 1024; /* XXX does not divide into 1 billion */
|
profhz = 1024; /* XXX does not divide into 1 billion */
|
||||||
clockintr_init(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: kern_clockintr.c,v 1.53 2023/09/15 11:48:49 deraadt Exp $ */
|
/* $OpenBSD: kern_clockintr.c,v 1.56 2023/09/17 15:24:35 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2003 Dale Rahn <drahn@openbsd.org>
|
* Copyright (c) 2003 Dale Rahn <drahn@openbsd.org>
|
||||||
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
|
* Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
|
||||||
|
@ -31,13 +31,6 @@
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
/*
|
|
||||||
* Protection for global variables in this file:
|
|
||||||
*
|
|
||||||
* I Immutable after initialization.
|
|
||||||
*/
|
|
||||||
uint32_t clockintr_flags; /* [I] global state + behavior flags */
|
|
||||||
|
|
||||||
void clockintr_hardclock(struct clockintr *, void *, void *);
|
void clockintr_hardclock(struct clockintr *, void *, void *);
|
||||||
void clockintr_schedule(struct clockintr *, uint64_t);
|
void clockintr_schedule(struct clockintr *, uint64_t);
|
||||||
void clockintr_schedule_locked(struct clockintr *, uint64_t);
|
void clockintr_schedule_locked(struct clockintr *, uint64_t);
|
||||||
|
@ -50,19 +43,6 @@ void clockqueue_pend_insert(struct clockintr_queue *, struct clockintr *,
|
||||||
void clockqueue_reset_intrclock(struct clockintr_queue *);
|
void clockqueue_reset_intrclock(struct clockintr_queue *);
|
||||||
uint64_t nsec_advance(uint64_t *, uint64_t, uint64_t);
|
uint64_t nsec_advance(uint64_t *, uint64_t, uint64_t);
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize global state. Set flags and compute intervals.
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
clockintr_init(uint32_t flags)
|
|
||||||
{
|
|
||||||
KASSERT(CPU_IS_PRIMARY(curcpu()));
|
|
||||||
KASSERT(clockintr_flags == 0);
|
|
||||||
KASSERT(!ISSET(flags, ~CL_FLAG_MASK));
|
|
||||||
|
|
||||||
SET(clockintr_flags, flags | CL_INIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ready the calling CPU for clockintr_dispatch(). If this is our
|
* Ready the calling CPU for clockintr_dispatch(). If this is our
|
||||||
* first time here, install the intrclock, if any, and set necessary
|
* first time here, install the intrclock, if any, and set necessary
|
||||||
|
@ -77,8 +57,6 @@ clockintr_cpu_init(const struct intrclock *ic)
|
||||||
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
struct schedstate_percpu *spc = &ci->ci_schedstate;
|
||||||
int reset_cq_intrclock = 0;
|
int reset_cq_intrclock = 0;
|
||||||
|
|
||||||
KASSERT(ISSET(clockintr_flags, CL_INIT));
|
|
||||||
|
|
||||||
if (ic != NULL)
|
if (ic != NULL)
|
||||||
clockqueue_intrclock_install(cq, ic);
|
clockqueue_intrclock_install(cq, ic);
|
||||||
|
|
||||||
|
@ -355,10 +333,9 @@ clockintr_cancel(struct clockintr *cl)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct clockintr *
|
struct clockintr *
|
||||||
clockintr_establish(void *vci,
|
clockintr_establish(struct cpu_info *ci,
|
||||||
void (*func)(struct clockintr *, void *, void *), void *arg)
|
void (*func)(struct clockintr *, void *, void *), void *arg)
|
||||||
{
|
{
|
||||||
struct cpu_info *ci = vci;
|
|
||||||
struct clockintr *cl;
|
struct clockintr *cl;
|
||||||
struct clockintr_queue *cq = &ci->ci_queue;
|
struct clockintr_queue *cq = &ci->ci_queue;
|
||||||
|
|
||||||
|
@ -370,7 +347,7 @@ clockintr_establish(void *vci,
|
||||||
cl->cl_queue = cq;
|
cl->cl_queue = cq;
|
||||||
|
|
||||||
mtx_enter(&cq->cq_mtx);
|
mtx_enter(&cq->cq_mtx);
|
||||||
TAILQ_INSERT_TAIL(&cq->cq_est, cl, cl_elink);
|
TAILQ_INSERT_TAIL(&cq->cq_all, cl, cl_alink);
|
||||||
mtx_leave(&cq->cq_mtx);
|
mtx_leave(&cq->cq_mtx);
|
||||||
return cl;
|
return cl;
|
||||||
}
|
}
|
||||||
|
@ -443,7 +420,7 @@ clockqueue_init(struct clockintr_queue *cq)
|
||||||
|
|
||||||
cq->cq_shadow.cl_queue = cq;
|
cq->cq_shadow.cl_queue = cq;
|
||||||
mtx_init(&cq->cq_mtx, IPL_CLOCK);
|
mtx_init(&cq->cq_mtx, IPL_CLOCK);
|
||||||
TAILQ_INIT(&cq->cq_est);
|
TAILQ_INIT(&cq->cq_all);
|
||||||
TAILQ_INIT(&cq->cq_pend);
|
TAILQ_INIT(&cq->cq_pend);
|
||||||
cq->cq_gen = 1;
|
cq->cq_gen = 1;
|
||||||
SET(cq->cq_flags, CQ_INIT);
|
SET(cq->cq_flags, CQ_INIT);
|
||||||
|
@ -623,7 +600,7 @@ db_show_clockintr_cpu(struct cpu_info *ci)
|
||||||
db_show_clockintr(cq->cq_running, "run", cpu);
|
db_show_clockintr(cq->cq_running, "run", cpu);
|
||||||
TAILQ_FOREACH(elm, &cq->cq_pend, cl_plink)
|
TAILQ_FOREACH(elm, &cq->cq_pend, cl_plink)
|
||||||
db_show_clockintr(elm, "pend", cpu);
|
db_show_clockintr(elm, "pend", cpu);
|
||||||
TAILQ_FOREACH(elm, &cq->cq_est, cl_elink) {
|
TAILQ_FOREACH(elm, &cq->cq_all, cl_alink) {
|
||||||
if (!ISSET(elm->cl_flags, CLST_PENDING))
|
if (!ISSET(elm->cl_flags, CLST_PENDING))
|
||||||
db_show_clockintr(elm, "idle", cpu);
|
db_show_clockintr(elm, "idle", cpu);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: sched_bsd.c,v 1.86 2023/09/10 03:08:05 cheloha Exp $ */
|
/* $OpenBSD: sched_bsd.c,v 1.87 2023/09/17 13:02:24 cheloha Exp $ */
|
||||||
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
|
/* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
@ -117,9 +117,9 @@ roundrobin(struct clockintr *cl, void *cf, void *arg)
|
||||||
* 1, 5, and 15 minute intervals.
|
* 1, 5, and 15 minute intervals.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
update_loadavg(void *arg)
|
update_loadavg(void *unused)
|
||||||
{
|
{
|
||||||
struct timeout *to = (struct timeout *)arg;
|
static struct timeout to = TIMEOUT_INITIALIZER(update_loadavg, NULL);
|
||||||
CPU_INFO_ITERATOR cii;
|
CPU_INFO_ITERATOR cii;
|
||||||
struct cpu_info *ci;
|
struct cpu_info *ci;
|
||||||
u_int i, nrun = 0;
|
u_int i, nrun = 0;
|
||||||
|
@ -135,7 +135,7 @@ update_loadavg(void *arg)
|
||||||
nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
|
nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
timeout_add_sec(to, 5);
|
timeout_add_sec(&to, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -227,9 +227,9 @@ fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
|
||||||
* Recompute process priorities, every second.
|
* Recompute process priorities, every second.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
schedcpu(void *arg)
|
schedcpu(void *unused)
|
||||||
{
|
{
|
||||||
struct timeout *to = (struct timeout *)arg;
|
static struct timeout to = TIMEOUT_INITIALIZER(schedcpu, NULL);
|
||||||
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
|
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
|
||||||
struct proc *p;
|
struct proc *p;
|
||||||
int s;
|
int s;
|
||||||
|
@ -280,7 +280,7 @@ schedcpu(void *arg)
|
||||||
SCHED_UNLOCK(s);
|
SCHED_UNLOCK(s);
|
||||||
}
|
}
|
||||||
wakeup(&lbolt);
|
wakeup(&lbolt);
|
||||||
timeout_add_sec(to, 1);
|
timeout_add_sec(&to, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -726,23 +726,14 @@ sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start the scheduler's periodic timeouts.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
scheduler_start(void)
|
scheduler_start(void)
|
||||||
{
|
{
|
||||||
static struct timeout schedcpu_to;
|
schedcpu(NULL);
|
||||||
static struct timeout loadavg_to;
|
update_loadavg(NULL);
|
||||||
|
|
||||||
/*
|
|
||||||
* We avoid polluting the global namespace by keeping the scheduler
|
|
||||||
* timeouts static in this function.
|
|
||||||
* We setup the timeout here and kick schedcpu once to make it do
|
|
||||||
* its job.
|
|
||||||
*/
|
|
||||||
timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
|
|
||||||
timeout_set(&loadavg_to, update_loadavg, &loadavg_to);
|
|
||||||
|
|
||||||
schedcpu(&schedcpu_to);
|
|
||||||
update_loadavg(&loadavg_to);
|
|
||||||
|
|
||||||
#ifndef SMALL_KERNEL
|
#ifndef SMALL_KERNEL
|
||||||
if (perfpolicy == PERFPOL_AUTO)
|
if (perfpolicy == PERFPOL_AUTO)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: clockintr.h,v 1.17 2023/09/15 11:48:48 deraadt Exp $ */
|
/* $OpenBSD: clockintr.h,v 1.20 2023/09/17 15:24:35 cheloha Exp $ */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2020-2022 Scott Cheloha <cheloha@openbsd.org>
|
* Copyright (c) 2020-2022 Scott Cheloha <cheloha@openbsd.org>
|
||||||
*
|
*
|
||||||
|
@ -35,6 +35,8 @@ struct clockintr_stat {
|
||||||
#include <sys/mutex.h>
|
#include <sys/mutex.h>
|
||||||
#include <sys/queue.h>
|
#include <sys/queue.h>
|
||||||
|
|
||||||
|
struct cpu_info;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Platform API
|
* Platform API
|
||||||
*/
|
*/
|
||||||
|
@ -68,7 +70,7 @@ intrclock_trigger(struct intrclock *ic)
|
||||||
struct clockintr_queue;
|
struct clockintr_queue;
|
||||||
struct clockintr {
|
struct clockintr {
|
||||||
uint64_t cl_expiration; /* [m] dispatch time */
|
uint64_t cl_expiration; /* [m] dispatch time */
|
||||||
TAILQ_ENTRY(clockintr) cl_elink; /* [m] cq_est glue */
|
TAILQ_ENTRY(clockintr) cl_alink; /* [m] cq_all glue */
|
||||||
TAILQ_ENTRY(clockintr) cl_plink; /* [m] cq_pend glue */
|
TAILQ_ENTRY(clockintr) cl_plink; /* [m] cq_pend glue */
|
||||||
void *cl_arg; /* [I] argument */
|
void *cl_arg; /* [I] argument */
|
||||||
void (*cl_func)(struct clockintr *, void *, void *); /* [I] callback */
|
void (*cl_func)(struct clockintr *, void *, void *); /* [I] callback */
|
||||||
|
@ -94,7 +96,7 @@ struct clockintr_queue {
|
||||||
struct clockintr cq_shadow; /* [o] copy of running clockintr */
|
struct clockintr cq_shadow; /* [o] copy of running clockintr */
|
||||||
struct mutex cq_mtx; /* [a] per-queue mutex */
|
struct mutex cq_mtx; /* [a] per-queue mutex */
|
||||||
uint64_t cq_uptime; /* [o] cached uptime */
|
uint64_t cq_uptime; /* [o] cached uptime */
|
||||||
TAILQ_HEAD(, clockintr) cq_est; /* [m] established clockintr list */
|
TAILQ_HEAD(, clockintr) cq_all; /* [m] established clockintr list */
|
||||||
TAILQ_HEAD(, clockintr) cq_pend;/* [m] pending clockintr list */
|
TAILQ_HEAD(, clockintr) cq_pend;/* [m] pending clockintr list */
|
||||||
struct clockintr *cq_running; /* [m] running clockintr */
|
struct clockintr *cq_running; /* [m] running clockintr */
|
||||||
struct clockintr *cq_hardclock; /* [o] hardclock handle */
|
struct clockintr *cq_hardclock; /* [o] hardclock handle */
|
||||||
|
@ -109,16 +111,8 @@ struct clockintr_queue {
|
||||||
#define CQ_INTRCLOCK 0x00000002 /* intrclock installed */
|
#define CQ_INTRCLOCK 0x00000002 /* intrclock installed */
|
||||||
#define CQ_STATE_MASK 0x00000003
|
#define CQ_STATE_MASK 0x00000003
|
||||||
|
|
||||||
/* Global state flags. */
|
|
||||||
#define CL_INIT 0x00000001 /* global init done */
|
|
||||||
#define CL_STATE_MASK 0x00000001
|
|
||||||
|
|
||||||
/* Global behavior flags. */
|
|
||||||
#define CL_FLAG_MASK 0x00000000
|
|
||||||
|
|
||||||
void clockintr_cpu_init(const struct intrclock *);
|
void clockintr_cpu_init(const struct intrclock *);
|
||||||
int clockintr_dispatch(void *);
|
int clockintr_dispatch(void *);
|
||||||
void clockintr_init(uint32_t);
|
|
||||||
void clockintr_trigger(void);
|
void clockintr_trigger(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -128,7 +122,7 @@ void clockintr_trigger(void);
|
||||||
uint64_t clockintr_advance(struct clockintr *, uint64_t);
|
uint64_t clockintr_advance(struct clockintr *, uint64_t);
|
||||||
uint64_t clockintr_advance_random(struct clockintr *, uint64_t, uint32_t);
|
uint64_t clockintr_advance_random(struct clockintr *, uint64_t, uint32_t);
|
||||||
void clockintr_cancel(struct clockintr *);
|
void clockintr_cancel(struct clockintr *);
|
||||||
struct clockintr *clockintr_establish(void *,
|
struct clockintr *clockintr_establish(struct cpu_info *,
|
||||||
void (*)(struct clockintr *, void *, void *), void *);
|
void (*)(struct clockintr *, void *, void *), void *);
|
||||||
void clockintr_stagger(struct clockintr *, uint64_t, uint32_t, uint32_t);
|
void clockintr_stagger(struct clockintr *, uint64_t, uint32_t, uint32_t);
|
||||||
void clockqueue_init(struct clockintr_queue *);
|
void clockqueue_init(struct clockintr_queue *);
|
||||||
|
|
1417
usr.bin/awk/FIXES
1417
usr.bin/awk/FIXES
File diff suppressed because it is too large
Load diff
1429
usr.bin/awk/FIXES.1e
Normal file
1429
usr.bin/awk/FIXES.1e
Normal file
File diff suppressed because it is too large
Load diff
23
usr.bin/awk/LICENSE
Normal file
23
usr.bin/awk/LICENSE
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
/****************************************************************
|
||||||
|
Copyright (C) Lucent Technologies 1997
|
||||||
|
All Rights Reserved
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and
|
||||||
|
its documentation for any purpose and without fee is hereby
|
||||||
|
granted, provided that the above copyright notice appear in all
|
||||||
|
copies and that both that the copyright notice and this
|
||||||
|
permission notice and warranty disclaimer appear in supporting
|
||||||
|
documentation, and that the name Lucent Technologies or any of
|
||||||
|
its entities not be used in advertising or publicity pertaining
|
||||||
|
to distribution of the software without specific, written prior
|
||||||
|
permission.
|
||||||
|
|
||||||
|
LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||||
|
INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
|
||||||
|
IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
|
||||||
|
SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
|
||||||
|
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||||
|
THIS SOFTWARE.
|
||||||
|
****************************************************************/
|
|
@ -1,10 +1,40 @@
|
||||||
$OpenBSD: README.md,v 1.6 2022/01/27 16:58:37 millert Exp $
|
$OpenBSD: README.md,v 1.7 2023/09/17 14:49:44 millert Exp $
|
||||||
|
|
||||||
# The One True Awk
|
# The One True Awk
|
||||||
|
|
||||||
This is the version of `awk` described in _The AWK Programming Language_,
|
This is the version of `awk` described in _The AWK Programming Language_,
|
||||||
by Al Aho, Brian Kernighan, and Peter Weinberger
|
Second Edition, by Al Aho, Brian Kernighan, and Peter Weinberger
|
||||||
(Addison-Wesley, 1988, ISBN 0-201-07981-X).
|
(Addison-Wesley, 2024, ISBN-13 978-0138269722, ISBN-10 0138269726).
|
||||||
|
|
||||||
|
## What's New? ##
|
||||||
|
|
||||||
|
This version of Awk handles UTF-8 and comma-separated values (CSV) input.
|
||||||
|
|
||||||
|
### Strings ###
|
||||||
|
|
||||||
|
Functions that process strings now count Unicode code points, not bytes;
|
||||||
|
this affects `length`, `substr`, `index`, `match`, `split`,
|
||||||
|
`sub`, `gsub`, and others. Note that code
|
||||||
|
points are not necessarily characters.
|
||||||
|
|
||||||
|
UTF-8 sequences may appear in literal strings and regular expressions.
|
||||||
|
Aribtrary characters may be included with `\u` followed by 1 to 8 hexadecimal digits.
|
||||||
|
|
||||||
|
### Regular expressions ###
|
||||||
|
|
||||||
|
Regular expressions may include UTF-8 code points, including `\u`.
|
||||||
|
Character classes are likely to be limited to about 256 characters
|
||||||
|
when expanded.
|
||||||
|
|
||||||
|
### CSV ###
|
||||||
|
|
||||||
|
The option `--csv` turns on CSV processing of input:
|
||||||
|
fields are separated by commas, fields may be quoted with
|
||||||
|
double-quote (`"`) characters, fields may contain embedded newlines.
|
||||||
|
In CSV mode, `FS` is ignored.
|
||||||
|
|
||||||
|
If no explicit separator argument is provided,
|
||||||
|
field-splitting in `split` is determined by CSV mode.
|
||||||
|
|
||||||
## Copyright
|
## Copyright
|
||||||
|
|
||||||
|
@ -69,22 +99,22 @@ The program itself is created by
|
||||||
|
|
||||||
which should produce a sequence of messages roughly like this:
|
which should produce a sequence of messages roughly like this:
|
||||||
|
|
||||||
yacc -d awkgram.y
|
bison -d awkgram.y
|
||||||
conflicts: 43 shift/reduce, 85 reduce/reduce
|
awkgram.y: warning: 44 shift/reduce conflicts [-Wconflicts-sr]
|
||||||
mv y.tab.c ytab.c
|
awkgram.y: warning: 85 reduce/reduce conflicts [-Wconflicts-rr]
|
||||||
mv y.tab.h ytab.h
|
awkgram.y: note: rerun with option '-Wcounterexamples' to generate conflict counterexamples
|
||||||
cc -c ytab.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o awkgram.tab.o awkgram.tab.c
|
||||||
cc -c b.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o b.o b.c
|
||||||
cc -c main.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o main.o main.c
|
||||||
cc -c parse.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o parse.o parse.c
|
||||||
cc maketab.c -o maketab
|
gcc -g -Wall -pedantic -Wcast-qual -O2 maketab.c -o maketab
|
||||||
./maketab >proctab.c
|
./maketab awkgram.tab.h >proctab.c
|
||||||
cc -c proctab.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o proctab.o proctab.c
|
||||||
cc -c tran.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o tran.o tran.c
|
||||||
cc -c lib.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o lib.o lib.c
|
||||||
cc -c run.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o run.o run.c
|
||||||
cc -c lex.c
|
gcc -g -Wall -pedantic -Wcast-qual -O2 -c -o lex.o lex.c
|
||||||
cc ytab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm
|
gcc -g -Wall -pedantic -Wcast-qual -O2 awkgram.tab.o b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -lm
|
||||||
|
|
||||||
This produces an executable `a.out`; you will eventually want to
|
This produces an executable `a.out`; you will eventually want to
|
||||||
move this to some place like `/usr/bin/awk`.
|
move this to some place like `/usr/bin/awk`.
|
||||||
|
@ -104,14 +134,9 @@ the standard developer tools.
|
||||||
You can also use `make CC=g++` to build with the GNU C++ compiler,
|
You can also use `make CC=g++` to build with the GNU C++ compiler,
|
||||||
should you choose to do so.
|
should you choose to do so.
|
||||||
|
|
||||||
The version of `malloc` that comes with some systems is sometimes
|
|
||||||
astonishly slow. If `awk` seems slow, you might try fixing that.
|
|
||||||
More generally, turning on optimization can significantly improve
|
|
||||||
`awk`'s speed, perhaps by 1/3 for highest levels.
|
|
||||||
|
|
||||||
## A Note About Releases
|
## A Note About Releases
|
||||||
|
|
||||||
We don't usually do releases.
|
We don't usually do releases.
|
||||||
|
|
||||||
## A Note About Maintenance
|
## A Note About Maintenance
|
||||||
|
|
||||||
|
@ -122,5 +147,4 @@ is not at the top of our priority list.
|
||||||
|
|
||||||
#### Last Updated
|
#### Last Updated
|
||||||
|
|
||||||
Sun 23 Jan 2022 03:48:01 PM EST
|
Sun Sep 3 09:26:43 EDT 2023
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.\" $OpenBSD: awk.1,v 1.64 2023/09/15 15:07:08 jsg Exp $
|
.\" $OpenBSD: awk.1,v 1.65 2023/09/17 14:49:44 millert Exp $
|
||||||
.\"
|
.\"
|
||||||
.\" Copyright (C) Lucent Technologies 1997
|
.\" Copyright (C) Lucent Technologies 1997
|
||||||
.\" All Rights Reserved
|
.\" All Rights Reserved
|
||||||
|
@ -22,7 +22,7 @@
|
||||||
.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||||
.\" THIS SOFTWARE.
|
.\" THIS SOFTWARE.
|
||||||
.\"
|
.\"
|
||||||
.Dd $Mdocdate: September 15 2023 $
|
.Dd $Mdocdate: September 17 2023 $
|
||||||
.Dt AWK 1
|
.Dt AWK 1
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
|
@ -33,7 +33,7 @@
|
||||||
.Op Fl safe
|
.Op Fl safe
|
||||||
.Op Fl V
|
.Op Fl V
|
||||||
.Op Fl d Ns Op Ar n
|
.Op Fl d Ns Op Ar n
|
||||||
.Op Fl F Ar fs
|
.Op Fl F Ar fs | Fl -csv
|
||||||
.Op Fl v Ar var Ns = Ns Ar value
|
.Op Fl v Ar var Ns = Ns Ar value
|
||||||
.Op Ar prog | Fl f Ar progfile
|
.Op Ar prog | Fl f Ar progfile
|
||||||
.Ar
|
.Ar
|
||||||
|
@ -64,6 +64,14 @@ and is executed at the time it would have been opened if it were a filename.
|
||||||
.Pp
|
.Pp
|
||||||
The options are as follows:
|
The options are as follows:
|
||||||
.Bl -tag -width "-safe "
|
.Bl -tag -width "-safe "
|
||||||
|
.It Fl -csv
|
||||||
|
Process records using the (more or less) standard comma-separated values
|
||||||
|
.Pq CSV
|
||||||
|
format instead of the input field separator.
|
||||||
|
When the
|
||||||
|
.Fl -csv
|
||||||
|
option is specified, attempts to change the input field separator
|
||||||
|
or record separator are ignored.
|
||||||
.It Fl d Ns Op Ar n
|
.It Fl d Ns Op Ar n
|
||||||
Debug mode.
|
Debug mode.
|
||||||
Set debug level to
|
Set debug level to
|
||||||
|
@ -1058,4 +1066,5 @@ to it.
|
||||||
The scope rules for variables in functions are a botch;
|
The scope rules for variables in functions are a botch;
|
||||||
the syntax is worse.
|
the syntax is worse.
|
||||||
.Pp
|
.Pp
|
||||||
Only eight-bit character sets are handled correctly.
|
Input is expected to be UTF-8 encoded.
|
||||||
|
Other multibyte character sets are not handled.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: awk.h,v 1.28 2022/09/01 15:21:28 millert Exp $ */
|
/* $OpenBSD: awk.h,v 1.29 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -80,6 +80,8 @@ extern char **SUBSEP;
|
||||||
extern Awkfloat *RSTART;
|
extern Awkfloat *RSTART;
|
||||||
extern Awkfloat *RLENGTH;
|
extern Awkfloat *RLENGTH;
|
||||||
|
|
||||||
|
extern bool CSV; /* true for csv input */
|
||||||
|
|
||||||
extern char *record; /* points to $0 */
|
extern char *record; /* points to $0 */
|
||||||
extern int lineno; /* line number in awk program */
|
extern int lineno; /* line number in awk program */
|
||||||
extern int errorflag; /* 1 if error has occurred */
|
extern int errorflag; /* 1 if error has occurred */
|
||||||
|
@ -236,7 +238,8 @@ extern int pairstack[], paircnt;
|
||||||
|
|
||||||
/* structures used by regular expression matching machinery, mostly b.c: */
|
/* structures used by regular expression matching machinery, mostly b.c: */
|
||||||
|
|
||||||
#define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
|
#define NCHARS (1256+3) /* 256 handles 8-bit chars; 128 does 7-bit */
|
||||||
|
/* BUG: some overflows (caught) if we use 256 */
|
||||||
/* watch out in match(), etc. */
|
/* watch out in match(), etc. */
|
||||||
#define HAT (NCHARS+2) /* matches ^ in regular expr */
|
#define HAT (NCHARS+2) /* matches ^ in regular expr */
|
||||||
#define NSTATES 32
|
#define NSTATES 32
|
||||||
|
@ -247,12 +250,19 @@ typedef struct rrow {
|
||||||
int i;
|
int i;
|
||||||
Node *np;
|
Node *np;
|
||||||
uschar *up;
|
uschar *up;
|
||||||
|
int *rp; /* rune representation of char class */
|
||||||
} lval; /* because Al stores a pointer in it! */
|
} lval; /* because Al stores a pointer in it! */
|
||||||
int *lfollow;
|
int *lfollow;
|
||||||
} rrow;
|
} rrow;
|
||||||
|
|
||||||
|
typedef struct gtt { /* gototab entry */
|
||||||
|
unsigned int ch;
|
||||||
|
unsigned int state;
|
||||||
|
} gtt;
|
||||||
|
|
||||||
typedef struct fa {
|
typedef struct fa {
|
||||||
unsigned int **gototab;
|
gtt **gototab;
|
||||||
|
int gototab_len;
|
||||||
uschar *out;
|
uschar *out;
|
||||||
uschar *restr;
|
uschar *restr;
|
||||||
int **posns;
|
int **posns;
|
||||||
|
|
269
usr.bin/awk/b.c
269
usr.bin/awk/b.c
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: b.c,v 1.37 2021/07/08 21:26:39 millert Exp $ */
|
/* $OpenBSD: b.c,v 1.38 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -81,6 +81,41 @@ int patlen;
|
||||||
fa *fatab[NFA];
|
fa *fatab[NFA];
|
||||||
int nfatab = 0; /* entries in fatab */
|
int nfatab = 0; /* entries in fatab */
|
||||||
|
|
||||||
|
|
||||||
|
/* utf-8 mechanism:
|
||||||
|
|
||||||
|
For most of Awk, utf-8 strings just "work", since they look like
|
||||||
|
null-terminated sequences of 8-bit bytes.
|
||||||
|
|
||||||
|
Functions like length(), index(), and substr() have to operate
|
||||||
|
in units of utf-8 characters. The u8_* functions in run.c
|
||||||
|
handle this.
|
||||||
|
|
||||||
|
Regular expressions are more complicated, since the basic
|
||||||
|
mechanism of the goto table used 8-bit byte indices into the
|
||||||
|
gototab entries to compute the next state. Unicode is a lot
|
||||||
|
bigger, so the gototab entries are now structs with a character
|
||||||
|
and a next state, and there is a linear search of the characters
|
||||||
|
to find the state. (Yes, this is slower, by a significant
|
||||||
|
amount. Tough.)
|
||||||
|
|
||||||
|
Throughout the RE mechanism in b.c, utf-8 characters are
|
||||||
|
converted to their utf-32 value. This mostly shows up in
|
||||||
|
cclenter, which expands character class ranges like a-z and now
|
||||||
|
alpha-omega. The size of a gototab array is still about 256.
|
||||||
|
This should be dynamic, but for now things work ok for a single
|
||||||
|
code page of Unicode, which is the most likely case.
|
||||||
|
|
||||||
|
The code changes are localized in run.c and b.c. I have added a
|
||||||
|
handful of functions to somewhat better hide the implementation,
|
||||||
|
but a lot more could be done.
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int get_gototab(fa*, int, int);
|
||||||
|
static int set_gototab(fa*, int, int, int);
|
||||||
|
extern int u8_rune(int *, const uschar *);
|
||||||
|
|
||||||
static int *
|
static int *
|
||||||
intalloc(size_t n, const char *f)
|
intalloc(size_t n, const char *f)
|
||||||
{
|
{
|
||||||
|
@ -113,7 +148,7 @@ resizesetvec(const char *f)
|
||||||
static void
|
static void
|
||||||
resize_state(fa *f, int state)
|
resize_state(fa *f, int state)
|
||||||
{
|
{
|
||||||
unsigned int **p;
|
gtt **p;
|
||||||
uschar *p2;
|
uschar *p2;
|
||||||
int **p3;
|
int **p3;
|
||||||
int i, new_count;
|
int i, new_count;
|
||||||
|
@ -123,7 +158,7 @@ resize_state(fa *f, int state)
|
||||||
|
|
||||||
new_count = state + 10; /* needs to be tuned */
|
new_count = state + 10; /* needs to be tuned */
|
||||||
|
|
||||||
p = (unsigned int **) reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
|
p = (gtt **) reallocarray(f->gototab, new_count, sizeof(f->gototab[0]));
|
||||||
if (p == NULL)
|
if (p == NULL)
|
||||||
goto out;
|
goto out;
|
||||||
f->gototab = p;
|
f->gototab = p;
|
||||||
|
@ -139,12 +174,13 @@ resize_state(fa *f, int state)
|
||||||
f->posns = p3;
|
f->posns = p3;
|
||||||
|
|
||||||
for (i = f->state_count; i < new_count; ++i) {
|
for (i = f->state_count; i < new_count; ++i) {
|
||||||
f->gototab[i] = (unsigned int *) calloc(NCHARS, sizeof(**f->gototab));
|
f->gototab[i] = (gtt *) calloc(NCHARS, sizeof(**f->gototab));
|
||||||
if (f->gototab[i] == NULL)
|
if (f->gototab[i] == NULL)
|
||||||
goto out;
|
goto out;
|
||||||
f->out[i] = 0;
|
f->out[i] = 0;
|
||||||
f->posns[i] = NULL;
|
f->posns[i] = NULL;
|
||||||
}
|
}
|
||||||
|
f->gototab_len = NCHARS; /* should be variable, growable */
|
||||||
f->state_count = new_count;
|
f->state_count = new_count;
|
||||||
return;
|
return;
|
||||||
out:
|
out:
|
||||||
|
@ -239,7 +275,7 @@ int makeinit(fa *f, bool anchor)
|
||||||
if ((f->posns[2])[1] == f->accept)
|
if ((f->posns[2])[1] == f->accept)
|
||||||
f->out[2] = 1;
|
f->out[2] = 1;
|
||||||
for (i = 0; i < NCHARS; i++)
|
for (i = 0; i < NCHARS; i++)
|
||||||
f->gototab[2][i] = 0;
|
set_gototab(f, 2, 0, 0); /* f->gototab[2][i] = 0; */
|
||||||
f->curstat = cgoto(f, 2, HAT);
|
f->curstat = cgoto(f, 2, HAT);
|
||||||
if (anchor) {
|
if (anchor) {
|
||||||
*f->posns[2] = k-1; /* leave out position 0 */
|
*f->posns[2] = k-1; /* leave out position 0 */
|
||||||
|
@ -308,13 +344,13 @@ void freetr(Node *p) /* free parse tree */
|
||||||
/* in the parsing of regular expressions, metacharacters like . have */
|
/* in the parsing of regular expressions, metacharacters like . have */
|
||||||
/* to be seen literally; \056 is not a metacharacter. */
|
/* to be seen literally; \056 is not a metacharacter. */
|
||||||
|
|
||||||
int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */
|
int hexstr(const uschar **pp, int max) /* find and eval hex string at pp, return new p */
|
||||||
{ /* only pick up one 8-bit byte (2 chars) */
|
{ /* only pick up one 8-bit byte (2 chars) */
|
||||||
const uschar *p;
|
const uschar *p;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) {
|
for (i = 0, p = *pp; i < max && isxdigit(*p); i++, p++) {
|
||||||
if (isdigit(*p))
|
if (isdigit(*p))
|
||||||
n = 16 * n + *p - '0';
|
n = 16 * n + *p - '0';
|
||||||
else if (*p >= 'a' && *p <= 'f')
|
else if (*p >= 'a' && *p <= 'f')
|
||||||
|
@ -334,24 +370,28 @@ int quoted(const uschar **pp) /* pick up next thing after a \\ */
|
||||||
const uschar *p = *pp;
|
const uschar *p = *pp;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
if ((c = *p++) == 't')
|
/* BUG: should advance by utf-8 char even if makes no sense */
|
||||||
|
|
||||||
|
if ((c = *p++) == 't') {
|
||||||
c = '\t';
|
c = '\t';
|
||||||
else if (c == 'n')
|
} else if (c == 'n') {
|
||||||
c = '\n';
|
c = '\n';
|
||||||
else if (c == 'f')
|
} else if (c == 'f') {
|
||||||
c = '\f';
|
c = '\f';
|
||||||
else if (c == 'r')
|
} else if (c == 'r') {
|
||||||
c = '\r';
|
c = '\r';
|
||||||
else if (c == 'b')
|
} else if (c == 'b') {
|
||||||
c = '\b';
|
c = '\b';
|
||||||
else if (c == 'v')
|
} else if (c == 'v') {
|
||||||
c = '\v';
|
c = '\v';
|
||||||
else if (c == 'a')
|
} else if (c == 'a') {
|
||||||
c = '\a';
|
c = '\a';
|
||||||
else if (c == '\\')
|
} else if (c == '\\') {
|
||||||
c = '\\';
|
c = '\\';
|
||||||
else if (c == 'x') { /* hexadecimal goo follows */
|
} else if (c == 'x') { /* 2 hex digits follow */
|
||||||
c = hexstr(&p); /* this adds a null if number is invalid */
|
c = hexstr(&p, 2); /* this adds a null if number is invalid */
|
||||||
|
} else if (c == 'u') { /* unicode char number up to 8 hex digits */
|
||||||
|
c = hexstr(&p, 8);
|
||||||
} else if (isoctdigit(c)) { /* \d \dd \ddd */
|
} else if (isoctdigit(c)) { /* \d \dd \ddd */
|
||||||
int n = c - '0';
|
int n = c - '0';
|
||||||
if (isoctdigit(*p)) {
|
if (isoctdigit(*p)) {
|
||||||
|
@ -366,50 +406,67 @@ int quoted(const uschar **pp) /* pick up next thing after a \\ */
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *cclenter(const char *argp) /* add a character class */
|
int *cclenter(const char *argp) /* add a character class */
|
||||||
{
|
{
|
||||||
int i, c, c2;
|
int i, c, c2;
|
||||||
const uschar *op, *p = (const uschar *) argp;
|
int n;
|
||||||
uschar *bp;
|
const uschar *p = (const uschar *) argp;
|
||||||
static uschar *buf = NULL;
|
int *bp, *retp;
|
||||||
|
static int *buf = NULL;
|
||||||
static int bufsz = 100;
|
static int bufsz = 100;
|
||||||
|
|
||||||
op = p;
|
if (buf == NULL && (buf = (int *) calloc(bufsz, sizeof(int))) == NULL)
|
||||||
if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL)
|
|
||||||
FATAL("out of space for character class [%.10s...] 1", p);
|
FATAL("out of space for character class [%.10s...] 1", p);
|
||||||
bp = buf;
|
bp = buf;
|
||||||
for (i = 0; (c = *p++) != 0; ) {
|
for (i = 0; *p != 0; ) {
|
||||||
|
n = u8_rune(&c, p);
|
||||||
|
p += n;
|
||||||
if (c == '\\') {
|
if (c == '\\') {
|
||||||
c = quoted(&p);
|
c = quoted(&p);
|
||||||
} else if (c == '-' && i > 0 && bp[-1] != 0) {
|
} else if (c == '-' && i > 0 && bp[-1] != 0) {
|
||||||
if (*p != 0) {
|
if (*p != 0) {
|
||||||
c = bp[-1];
|
c = bp[-1];
|
||||||
c2 = *p++;
|
/* c2 = *p++; */
|
||||||
|
n = u8_rune(&c2, p);
|
||||||
|
p += n;
|
||||||
if (c2 == '\\')
|
if (c2 == '\\')
|
||||||
c2 = quoted(&p);
|
c2 = quoted(&p); /* BUG: sets p, has to be u8 size */
|
||||||
if (c > c2) { /* empty; ignore */
|
if (c > c2) { /* empty; ignore */
|
||||||
bp--;
|
bp--;
|
||||||
i--;
|
i--;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
while (c < c2) {
|
while (c < c2) {
|
||||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter1"))
|
if (i >= bufsz) {
|
||||||
FATAL("out of space for character class [%.10s...] 2", p);
|
buf = (int *) reallocarray(buf, bufsz, sizeof(int) * 2);
|
||||||
|
if (buf == NULL)
|
||||||
|
FATAL("out of space for character class [%.10s...] 2", p);
|
||||||
|
bufsz *= 2;
|
||||||
|
bp = buf + i;
|
||||||
|
}
|
||||||
*bp++ = ++c;
|
*bp++ = ++c;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!adjbuf((char **) &buf, &bufsz, bp-buf+2, 100, (char **) &bp, "cclenter2"))
|
if (i >= bufsz) {
|
||||||
FATAL("out of space for character class [%.10s...] 3", p);
|
buf = (int *) reallocarray(buf, bufsz, sizeof(int) * 2);
|
||||||
|
if (buf == NULL)
|
||||||
|
FATAL("out of space for character class [%.10s...] 2", p);
|
||||||
|
bufsz *= 2;
|
||||||
|
bp = buf + i;
|
||||||
|
}
|
||||||
*bp++ = c;
|
*bp++ = c;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
*bp = 0;
|
*bp = 0;
|
||||||
DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf);
|
/* DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf); BUG: can't print array of int */
|
||||||
xfree(op);
|
/* xfree(op); BUG: what are we freeing here? */
|
||||||
return (char *) tostring((char *) buf);
|
retp = (int *) calloc(bp-buf+1, sizeof(int));
|
||||||
|
for (i = 0; i < bp-buf+1; i++)
|
||||||
|
retp[i] = buf[i];
|
||||||
|
return retp;
|
||||||
}
|
}
|
||||||
|
|
||||||
void overflo(const char *s)
|
void overflo(const char *s)
|
||||||
|
@ -532,9 +589,9 @@ void follow(Node *v) /* collects leaves that can follow v into setvec */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int member(int c, const char *sarg) /* is c in s? */
|
int member(int c, int *sarg) /* is c in s? */
|
||||||
{
|
{
|
||||||
const uschar *s = (const uschar *) sarg;
|
int *s = (int *) sarg;
|
||||||
|
|
||||||
while (*s)
|
while (*s)
|
||||||
if (c == *s++)
|
if (c == *s++)
|
||||||
|
@ -542,11 +599,41 @@ int member(int c, const char *sarg) /* is c in s? */
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int get_gototab(fa *f, int state, int ch) /* hide gototab inplementation */
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < f->gototab_len; i++) {
|
||||||
|
if (f->gototab[state][i].ch == 0)
|
||||||
|
break;
|
||||||
|
if (f->gototab[state][i].ch == ch)
|
||||||
|
return f->gototab[state][i].state;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int set_gototab(fa *f, int state, int ch, int val) /* hide gototab inplementation */
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < f->gototab_len; i++) {
|
||||||
|
if (f->gototab[state][i].ch == 0 || f->gototab[state][i].ch == ch) {
|
||||||
|
f->gototab[state][i].ch = ch;
|
||||||
|
f->gototab[state][i].state = val;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
overflo(__func__);
|
||||||
|
return val; /* not used anywhere at the moment */
|
||||||
|
}
|
||||||
|
|
||||||
int match(fa *f, const char *p0) /* shortest match ? */
|
int match(fa *f, const char *p0) /* shortest match ? */
|
||||||
{
|
{
|
||||||
int s, ns;
|
int s, ns;
|
||||||
|
int n;
|
||||||
|
int rune;
|
||||||
const uschar *p = (const uschar *) p0;
|
const uschar *p = (const uschar *) p0;
|
||||||
|
|
||||||
|
/* return pmatch(f, p0); does it matter whether longest or shortest? */
|
||||||
|
|
||||||
s = f->initstat;
|
s = f->initstat;
|
||||||
assert (s < f->state_count);
|
assert (s < f->state_count);
|
||||||
|
|
||||||
|
@ -554,19 +641,25 @@ int match(fa *f, const char *p0) /* shortest match ? */
|
||||||
return(1);
|
return(1);
|
||||||
do {
|
do {
|
||||||
/* assert(*p < NCHARS); */
|
/* assert(*p < NCHARS); */
|
||||||
if ((ns = f->gototab[s][*p]) != 0)
|
n = u8_rune(&rune, p);
|
||||||
|
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||||
s = ns;
|
s = ns;
|
||||||
else
|
else
|
||||||
s = cgoto(f, s, *p);
|
s = cgoto(f, s, rune);
|
||||||
if (f->out[s])
|
if (f->out[s])
|
||||||
return(1);
|
return(1);
|
||||||
} while (*p++ != 0);
|
if (*p == 0)
|
||||||
|
break;
|
||||||
|
p += n;
|
||||||
|
} while (1); /* was *p++ != 0 */
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||||
{
|
{
|
||||||
int s, ns;
|
int s, ns;
|
||||||
|
int n;
|
||||||
|
int rune;
|
||||||
const uschar *p = (const uschar *) p0;
|
const uschar *p = (const uschar *) p0;
|
||||||
const uschar *q;
|
const uschar *q;
|
||||||
|
|
||||||
|
@ -581,10 +674,11 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||||
if (f->out[s]) /* final state */
|
if (f->out[s]) /* final state */
|
||||||
patlen = q-p;
|
patlen = q-p;
|
||||||
/* assert(*q < NCHARS); */
|
/* assert(*q < NCHARS); */
|
||||||
if ((ns = f->gototab[s][*q]) != 0)
|
n = u8_rune(&rune, q);
|
||||||
|
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||||
s = ns;
|
s = ns;
|
||||||
else
|
else
|
||||||
s = cgoto(f, s, *q);
|
s = cgoto(f, s, rune);
|
||||||
|
|
||||||
assert(s < f->state_count);
|
assert(s < f->state_count);
|
||||||
|
|
||||||
|
@ -596,7 +690,11 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||||
else
|
else
|
||||||
goto nextin; /* no match */
|
goto nextin; /* no match */
|
||||||
}
|
}
|
||||||
} while (*q++ != 0);
|
if (*q == 0)
|
||||||
|
break;
|
||||||
|
q += n;
|
||||||
|
} while (1);
|
||||||
|
q++; /* was *q++ */
|
||||||
if (f->out[s])
|
if (f->out[s])
|
||||||
patlen = q-p-1; /* don't count $ */
|
patlen = q-p-1; /* don't count $ */
|
||||||
if (patlen >= 0) {
|
if (patlen >= 0) {
|
||||||
|
@ -605,13 +703,19 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */
|
||||||
}
|
}
|
||||||
nextin:
|
nextin:
|
||||||
s = 2;
|
s = 2;
|
||||||
} while (*p++);
|
if (*p == 0)
|
||||||
|
break;
|
||||||
|
n = u8_rune(&rune, p);
|
||||||
|
p += n;
|
||||||
|
} while (1); /* was *p++ */
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||||
{
|
{
|
||||||
int s, ns;
|
int s, ns;
|
||||||
|
int n;
|
||||||
|
int rune;
|
||||||
const uschar *p = (const uschar *) p0;
|
const uschar *p = (const uschar *) p0;
|
||||||
const uschar *q;
|
const uschar *q;
|
||||||
|
|
||||||
|
@ -626,10 +730,11 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||||
if (f->out[s]) /* final state */
|
if (f->out[s]) /* final state */
|
||||||
patlen = q-p;
|
patlen = q-p;
|
||||||
/* assert(*q < NCHARS); */
|
/* assert(*q < NCHARS); */
|
||||||
if ((ns = f->gototab[s][*q]) != 0)
|
n = u8_rune(&rune, q);
|
||||||
|
if ((ns = get_gototab(f, s, rune)) != 0)
|
||||||
s = ns;
|
s = ns;
|
||||||
else
|
else
|
||||||
s = cgoto(f, s, *q);
|
s = cgoto(f, s, rune);
|
||||||
if (s == 1) { /* no transition */
|
if (s == 1) { /* no transition */
|
||||||
if (patlen > 0) {
|
if (patlen > 0) {
|
||||||
patbeg = (const char *) p;
|
patbeg = (const char *) p;
|
||||||
|
@ -637,7 +742,11 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||||
} else
|
} else
|
||||||
goto nnextin; /* no nonempty match */
|
goto nnextin; /* no nonempty match */
|
||||||
}
|
}
|
||||||
} while (*q++ != 0);
|
if (*q == 0)
|
||||||
|
break;
|
||||||
|
q += n;
|
||||||
|
} while (1);
|
||||||
|
q++;
|
||||||
if (f->out[s])
|
if (f->out[s])
|
||||||
patlen = q-p-1; /* don't count $ */
|
patlen = q-p-1; /* don't count $ */
|
||||||
if (patlen > 0 ) {
|
if (patlen > 0 ) {
|
||||||
|
@ -651,6 +760,35 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int getrune(FILE *fp, char **pbuf, int *pbufsize, int quantum,
|
||||||
|
int *curpos, int *lastpos)
|
||||||
|
{
|
||||||
|
int c = 0;
|
||||||
|
char *buf = *pbuf;
|
||||||
|
static const int max_bytes = 4; // max multiple bytes in UTF-8 is 4
|
||||||
|
int i, rune;
|
||||||
|
uschar private_buf[max_bytes + 1];
|
||||||
|
|
||||||
|
for (i = 0; i <= max_bytes; i++) {
|
||||||
|
if (++*curpos == *lastpos) {
|
||||||
|
if (*lastpos == *pbufsize)
|
||||||
|
if (!adjbuf((char **) pbuf, pbufsize, *pbufsize+1, quantum, 0, "getrune"))
|
||||||
|
FATAL("stream '%.30s...' too long", buf);
|
||||||
|
buf[(*lastpos)++] = (c = getc(fp)) != EOF ? c : 0;
|
||||||
|
private_buf[i] = c;
|
||||||
|
}
|
||||||
|
if (c == 0 || c < 128 || (c >> 6) == 4) { // 10xxxxxx starts a new character
|
||||||
|
ungetc(c, fp);
|
||||||
|
private_buf[i] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u8_rune(& rune, private_buf);
|
||||||
|
|
||||||
|
return rune;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NAME
|
* NAME
|
||||||
|
@ -672,6 +810,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
||||||
char *buf = *pbuf;
|
char *buf = *pbuf;
|
||||||
int bufsize = *pbufsize;
|
int bufsize = *pbufsize;
|
||||||
int c, i, j, k, ns, s;
|
int c, i, j, k, ns, s;
|
||||||
|
int rune;
|
||||||
|
|
||||||
s = pfa->initstat;
|
s = pfa->initstat;
|
||||||
patlen = 0;
|
patlen = 0;
|
||||||
|
@ -695,12 +834,19 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum)
|
||||||
buf[k++] = (c = getc(f)) != EOF ? c : 0;
|
buf[k++] = (c = getc(f)) != EOF ? c : 0;
|
||||||
}
|
}
|
||||||
c = (uschar)buf[j];
|
c = (uschar)buf[j];
|
||||||
/* assert(c < NCHARS); */
|
if (c < 128)
|
||||||
|
rune = c;
|
||||||
|
else {
|
||||||
|
j--;
|
||||||
|
k--;
|
||||||
|
ungetc(c, f);
|
||||||
|
rune = getrune(f, &buf, &bufsize, quantum, &j, &k);
|
||||||
|
}
|
||||||
|
|
||||||
if ((ns = pfa->gototab[s][c]) != 0)
|
if ((ns = get_gototab(pfa, s, rune)) != 0)
|
||||||
s = ns;
|
s = ns;
|
||||||
else
|
else
|
||||||
s = cgoto(pfa, s, c);
|
s = cgoto(pfa, s, rune);
|
||||||
|
|
||||||
if (pfa->out[s]) { /* final state */
|
if (pfa->out[s]) { /* final state */
|
||||||
patlen = j - i + 1;
|
patlen = j - i + 1;
|
||||||
|
@ -1026,6 +1172,8 @@ static int repeat(const uschar *reptok, int reptoklen, const uschar *atom,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int u8_rune(int *, const uschar *); /* run.c; should be in header file */
|
||||||
|
|
||||||
int relex(void) /* lexical analyzer for reparse */
|
int relex(void) /* lexical analyzer for reparse */
|
||||||
{
|
{
|
||||||
int c, n;
|
int c, n;
|
||||||
|
@ -1043,6 +1191,12 @@ int relex(void) /* lexical analyzer for reparse */
|
||||||
rescan:
|
rescan:
|
||||||
starttok = prestr;
|
starttok = prestr;
|
||||||
|
|
||||||
|
if ((n = u8_rune(&rlxval, prestr)) > 1) {
|
||||||
|
prestr += n;
|
||||||
|
starttok = prestr;
|
||||||
|
return CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
switch (c = *prestr++) {
|
switch (c = *prestr++) {
|
||||||
case '|': return OR;
|
case '|': return OR;
|
||||||
case '*': return STAR;
|
case '*': return STAR;
|
||||||
|
@ -1080,10 +1234,15 @@ rescan:
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
cflag = 0;
|
cflag = 0;
|
||||||
n = 2 * strlen((const char *) prestr)+1;
|
n = 5 * strlen((const char *) prestr)+1; /* BUG: was 2. what value? */
|
||||||
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
|
if (!adjbuf((char **) &buf, &bufsz, n, n, (char **) &bp, "relex1"))
|
||||||
FATAL("out of space for reg expr %.10s...", lastre);
|
FATAL("out of space for reg expr %.10s...", lastre);
|
||||||
for (; ; ) {
|
for (; ; ) {
|
||||||
|
if ((n = u8_rune(&rlxval, prestr)) > 1) {
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
*bp++ = *prestr++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if ((c = *prestr++) == '\\') {
|
if ((c = *prestr++) == '\\') {
|
||||||
*bp++ = '\\';
|
*bp++ = '\\';
|
||||||
if ((c = *prestr++) == '\0')
|
if ((c = *prestr++) == '\0')
|
||||||
|
@ -1250,7 +1409,7 @@ int cgoto(fa *f, int s, int c)
|
||||||
int *p, *q;
|
int *p, *q;
|
||||||
int i, j, k;
|
int i, j, k;
|
||||||
|
|
||||||
assert(c == HAT || c < NCHARS);
|
/* assert(c == HAT || c < NCHARS); BUG: seg fault if disable test */
|
||||||
while (f->accept >= maxsetvec) { /* guessing here! */
|
while (f->accept >= maxsetvec) { /* guessing here! */
|
||||||
resizesetvec(__func__);
|
resizesetvec(__func__);
|
||||||
}
|
}
|
||||||
|
@ -1266,8 +1425,8 @@ int cgoto(fa *f, int s, int c)
|
||||||
|| (k == DOT && c != 0 && c != HAT)
|
|| (k == DOT && c != 0 && c != HAT)
|
||||||
|| (k == ALL && c != 0)
|
|| (k == ALL && c != 0)
|
||||||
|| (k == EMPTYRE && c != 0)
|
|| (k == EMPTYRE && c != 0)
|
||||||
|| (k == CCL && member(c, (char *) f->re[p[i]].lval.up))
|
|| (k == CCL && member(c, (int *) f->re[p[i]].lval.rp))
|
||||||
|| (k == NCCL && !member(c, (char *) f->re[p[i]].lval.up) && c != 0 && c != HAT)) {
|
|| (k == NCCL && !member(c, (int *) f->re[p[i]].lval.rp) && c != 0 && c != HAT)) {
|
||||||
q = f->re[p[i]].lfollow;
|
q = f->re[p[i]].lfollow;
|
||||||
for (j = 1; j <= *q; j++) {
|
for (j = 1; j <= *q; j++) {
|
||||||
if (q[j] >= maxsetvec) {
|
if (q[j] >= maxsetvec) {
|
||||||
|
@ -1299,7 +1458,7 @@ int cgoto(fa *f, int s, int c)
|
||||||
goto different;
|
goto different;
|
||||||
/* setvec is state i */
|
/* setvec is state i */
|
||||||
if (c != HAT)
|
if (c != HAT)
|
||||||
f->gototab[s][c] = i;
|
set_gototab(f, s, c, i);
|
||||||
return i;
|
return i;
|
||||||
different:;
|
different:;
|
||||||
}
|
}
|
||||||
|
@ -1308,13 +1467,13 @@ int cgoto(fa *f, int s, int c)
|
||||||
++(f->curstat);
|
++(f->curstat);
|
||||||
resize_state(f, f->curstat);
|
resize_state(f, f->curstat);
|
||||||
for (i = 0; i < NCHARS; i++)
|
for (i = 0; i < NCHARS; i++)
|
||||||
f->gototab[f->curstat][i] = 0;
|
set_gototab(f, f->curstat, 0, 0);
|
||||||
xfree(f->posns[f->curstat]);
|
xfree(f->posns[f->curstat]);
|
||||||
p = intalloc(setcnt + 1, __func__);
|
p = intalloc(setcnt + 1, __func__);
|
||||||
|
|
||||||
f->posns[f->curstat] = p;
|
f->posns[f->curstat] = p;
|
||||||
if (c != HAT)
|
if (c != HAT)
|
||||||
f->gototab[s][c] = f->curstat;
|
set_gototab(f, s, c, f->curstat);
|
||||||
for (i = 0; i <= setcnt; i++)
|
for (i = 0; i <= setcnt; i++)
|
||||||
p[i] = tmpset[i];
|
p[i] = tmpset[i];
|
||||||
if (setvec[f->accept])
|
if (setvec[f->accept])
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: lex.c,v 1.30 2023/09/10 14:59:00 millert Exp $ */
|
/* $OpenBSD: lex.c,v 1.31 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -379,6 +379,8 @@ int yylex(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int runetochar(char *str, int c);
|
||||||
|
|
||||||
int string(void)
|
int string(void)
|
||||||
{
|
{
|
||||||
int c, n;
|
int c, n;
|
||||||
|
@ -426,7 +428,7 @@ int string(void)
|
||||||
*bp++ = n;
|
*bp++ = n;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'x': /* hex \x0-9a-fA-F + */
|
case 'x': /* hex \x0-9a-fA-F (exactly two) */
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
@ -452,6 +454,27 @@ int string(void)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'u': /* utf \u0-9a-fA-F (1..8) */
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
n = 0;
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
c = input();
|
||||||
|
if (!isxdigit(c) || c == 0)
|
||||||
|
break;
|
||||||
|
c = tolower(c);
|
||||||
|
n *= 16;
|
||||||
|
if (isdigit(c))
|
||||||
|
n += (c - '0');
|
||||||
|
else
|
||||||
|
n += 10 + (c - 'a');
|
||||||
|
}
|
||||||
|
unput(c);
|
||||||
|
bp += runetochar(bp, n);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
*bp++ = c;
|
*bp++ = c;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: lib.c,v 1.50 2023/09/10 14:59:00 millert Exp $ */
|
/* $OpenBSD: lib.c,v 1.51 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -34,6 +34,8 @@ THIS SOFTWARE.
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "awk.h"
|
#include "awk.h"
|
||||||
|
|
||||||
|
extern int u8_nextlen(const char *s);
|
||||||
|
|
||||||
char EMPTY[] = { '\0' };
|
char EMPTY[] = { '\0' };
|
||||||
FILE *infile = NULL;
|
FILE *infile = NULL;
|
||||||
bool innew; /* true = infile has not been read by readrec */
|
bool innew; /* true = infile has not been read by readrec */
|
||||||
|
@ -217,14 +219,19 @@ void nextfile(void)
|
||||||
argno++;
|
argno++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag);
|
||||||
|
|
||||||
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
|
int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */
|
||||||
{
|
{
|
||||||
int sep, c, isrec;
|
int sep, c, isrec; // POTENTIAL BUG? isrec is a macro in awk.h
|
||||||
char *rr, *buf = *pbuf;
|
char *rr = *pbuf, *buf = *pbuf;
|
||||||
int bufsize = *pbufsize;
|
int bufsize = *pbufsize;
|
||||||
char *rs = getsval(rsloc);
|
char *rs = getsval(rsloc);
|
||||||
|
|
||||||
if (*rs && rs[1]) {
|
if (CSV) {
|
||||||
|
c = readcsvrec(pbuf, pbufsize, inf, newflag);
|
||||||
|
isrec = (c == EOF && rr == buf) ? false : true;
|
||||||
|
} else if (*rs && rs[1]) {
|
||||||
bool found;
|
bool found;
|
||||||
|
|
||||||
fa *pfa = makedfa(rs, 1);
|
fa *pfa = makedfa(rs, 1);
|
||||||
|
@ -276,6 +283,51 @@ int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one rec
|
||||||
return isrec;
|
return isrec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*******************
|
||||||
|
* loose ends here:
|
||||||
|
* \r\n should become \n
|
||||||
|
* what about bare \r? Excel uses that for embedded newlines
|
||||||
|
* can't have "" in unquoted fields, according to RFC 4180
|
||||||
|
*/
|
||||||
|
|
||||||
|
int readcsvrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* csv can have \n's */
|
||||||
|
{ /* so read a complete record that might be multiple lines */
|
||||||
|
int sep, c;
|
||||||
|
char *rr = *pbuf, *buf = *pbuf;
|
||||||
|
int bufsize = *pbufsize;
|
||||||
|
bool in_quote = false;
|
||||||
|
|
||||||
|
sep = '\n'; /* the only separator; have to skip over \n embedded in "..." */
|
||||||
|
rr = buf;
|
||||||
|
while ((c = getc(inf)) != EOF) {
|
||||||
|
if (c == sep) {
|
||||||
|
if (! in_quote)
|
||||||
|
break;
|
||||||
|
if (rr > buf && rr[-1] == '\r') // remove \r if was \r\n
|
||||||
|
rr--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rr-buf+1 > bufsize)
|
||||||
|
if (!adjbuf(&buf, &bufsize, 1+rr-buf,
|
||||||
|
recsize, &rr, "readcsvrec 1"))
|
||||||
|
FATAL("input record `%.30s...' too long", buf);
|
||||||
|
*rr++ = c;
|
||||||
|
if (c == '"')
|
||||||
|
in_quote = ! in_quote;
|
||||||
|
}
|
||||||
|
if (c == '\n' && rr > buf && rr[-1] == '\r') // remove \r if was \r\n
|
||||||
|
rr--;
|
||||||
|
|
||||||
|
if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readcsvrec 4"))
|
||||||
|
FATAL("input record `%.30s...' too long", buf);
|
||||||
|
*rr = 0;
|
||||||
|
*pbuf = buf;
|
||||||
|
*pbufsize = bufsize;
|
||||||
|
DPRINTF("readcsvrec saw <%s>, returns %d\n", buf, c);
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
char *getargv(int n) /* get ARGV[n] */
|
char *getargv(int n) /* get ARGV[n] */
|
||||||
{
|
{
|
||||||
Cell *x;
|
Cell *x;
|
||||||
|
@ -297,6 +349,9 @@ void setclvar(char *s) /* set var=value from s */
|
||||||
Cell *q;
|
Cell *q;
|
||||||
double result;
|
double result;
|
||||||
|
|
||||||
|
/* commit f3d9187d4e0f02294fb1b0e31152070506314e67 broke T.argv test */
|
||||||
|
/* I don't understand why it was changed. */
|
||||||
|
|
||||||
for (p=s; *p != '='; p++)
|
for (p=s; *p != '='; p++)
|
||||||
;
|
;
|
||||||
e = p;
|
e = p;
|
||||||
|
@ -341,7 +396,7 @@ void fldbld(void) /* create fields from current record */
|
||||||
savefs();
|
savefs();
|
||||||
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
if (strlen(inputFS) > 1) { /* it's a regular expression */
|
||||||
i = refldbld(r, inputFS);
|
i = refldbld(r, inputFS);
|
||||||
} else if ((sep = *inputFS) == ' ') { /* default whitespace */
|
} else if (!CSV && (sep = *inputFS) == ' ') { /* default whitespace */
|
||||||
for (i = 0; ; ) {
|
for (i = 0; ; ) {
|
||||||
while (*r == ' ' || *r == '\t' || *r == '\n')
|
while (*r == ' ' || *r == '\t' || *r == '\n')
|
||||||
r++;
|
r++;
|
||||||
|
@ -360,26 +415,58 @@ void fldbld(void) /* create fields from current record */
|
||||||
*fr++ = 0;
|
*fr++ = 0;
|
||||||
}
|
}
|
||||||
*fr = 0;
|
*fr = 0;
|
||||||
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
|
} else if (CSV) { /* CSV processing. no error handling */
|
||||||
for (i = 0; *r != '\0'; r += n) {
|
if (*r != 0) {
|
||||||
char buf[MB_LEN_MAX + 1];
|
for (;;) {
|
||||||
|
i++;
|
||||||
|
if (i > nfields)
|
||||||
|
growfldtab(i);
|
||||||
|
if (freeable(fldtab[i]))
|
||||||
|
xfree(fldtab[i]->sval);
|
||||||
|
fldtab[i]->sval = fr;
|
||||||
|
fldtab[i]->tval = FLD | STR | DONTFREE;
|
||||||
|
if (*r == '"' ) { /* start of "..." */
|
||||||
|
for (r++ ; *r != '\0'; ) {
|
||||||
|
if (*r == '"' && r[1] != '\0' && r[1] == '"') {
|
||||||
|
r += 2; /* doubled quote */
|
||||||
|
*fr++ = '"';
|
||||||
|
} else if (*r == '"' && (r[1] == '\0' || r[1] == ',')) {
|
||||||
|
r++; /* skip over closing quote */
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*fr++ = *r++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*fr++ = 0;
|
||||||
|
} else { /* unquoted field */
|
||||||
|
while (*r != ',' && *r != '\0')
|
||||||
|
*fr++ = *r++;
|
||||||
|
*fr++ = 0;
|
||||||
|
}
|
||||||
|
if (*r++ == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*fr = 0;
|
||||||
|
} else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */
|
||||||
|
for (i = 0; *r != '\0'; ) {
|
||||||
|
char buf[10];
|
||||||
i++;
|
i++;
|
||||||
if (i > nfields)
|
if (i > nfields)
|
||||||
growfldtab(i);
|
growfldtab(i);
|
||||||
if (freeable(fldtab[i]))
|
if (freeable(fldtab[i]))
|
||||||
xfree(fldtab[i]->sval);
|
xfree(fldtab[i]->sval);
|
||||||
n = mblen(r, MB_LEN_MAX);
|
n = u8_nextlen(r);
|
||||||
if (n < 0)
|
for (j = 0; j < n; j++)
|
||||||
n = 1;
|
buf[j] = *r++;
|
||||||
memcpy(buf, r, n);
|
buf[j] = '\0';
|
||||||
buf[n] = '\0';
|
|
||||||
fldtab[i]->sval = tostring(buf);
|
fldtab[i]->sval = tostring(buf);
|
||||||
fldtab[i]->tval = FLD | STR;
|
fldtab[i]->tval = FLD | STR;
|
||||||
}
|
}
|
||||||
*fr = 0;
|
*fr = 0;
|
||||||
} else if (*r != 0) { /* if 0, it's a null field */
|
} else if (*r != 0) { /* if 0, it's a null field */
|
||||||
/* subtlecase : if length(FS) == 1 && length(RS > 0)
|
/* subtle case: if length(FS) == 1 && length(RS > 0)
|
||||||
* \n is NOT a field separator (cf awk book 61,84).
|
* \n is NOT a field separator (cf awk book 61,84).
|
||||||
* this variable is tested in the inner while loop.
|
* this variable is tested in the inner while loop.
|
||||||
*/
|
*/
|
||||||
|
@ -797,11 +884,11 @@ bool is_valid_number(const char *s, bool trailing_stuff_ok,
|
||||||
while (isspace((uschar)*s))
|
while (isspace((uschar)*s))
|
||||||
s++;
|
s++;
|
||||||
|
|
||||||
// no hex floating point, sorry
|
/* no hex floating point, sorry */
|
||||||
if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
|
if (s[0] == '0' && tolower((uschar)s[1]) == 'x')
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// allow +nan, -nan, +inf, -inf, any other letter, no
|
/* allow +nan, -nan, +inf, -inf, any other letter, no */
|
||||||
if (s[0] == '+' || s[0] == '-') {
|
if (s[0] == '+' || s[0] == '-') {
|
||||||
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
|
is_nan = (strncasecmp(s+1, "nan", 3) == 0);
|
||||||
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
|
is_inf = (strncasecmp(s+1, "inf", 3) == 0);
|
||||||
|
@ -835,7 +922,7 @@ convert:
|
||||||
if (no_trailing != NULL)
|
if (no_trailing != NULL)
|
||||||
*no_trailing = (*ep == '\0');
|
*no_trailing = (*ep == '\0');
|
||||||
|
|
||||||
// return true if found the end, or trailing stuff is allowed
|
/* return true if found the end, or trailing stuff is allowed */
|
||||||
retval = *ep == '\0' || trailing_stuff_ok;
|
retval = *ep == '\0' || trailing_stuff_ok;
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: main.c,v 1.58 2023/09/10 14:59:00 millert Exp $ */
|
/* $OpenBSD: main.c,v 1.59 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
|
||||||
THIS SOFTWARE.
|
THIS SOFTWARE.
|
||||||
****************************************************************/
|
****************************************************************/
|
||||||
|
|
||||||
const char *version = "version 20230909";
|
const char *version = "version 20230913";
|
||||||
|
|
||||||
#define DEBUG
|
#define DEBUG
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -52,6 +52,7 @@ static size_t maxpfile; /* max program filename */
|
||||||
static size_t npfile; /* number of filenames */
|
static size_t npfile; /* number of filenames */
|
||||||
static size_t curpfile; /* current filename */
|
static size_t curpfile; /* current filename */
|
||||||
|
|
||||||
|
bool CSV = false; /* true for csv input */
|
||||||
bool safe = false; /* true => "safe" mode */
|
bool safe = false; /* true => "safe" mode */
|
||||||
bool do_posix = false; /* true => POSIX mode */
|
bool do_posix = false; /* true => POSIX mode */
|
||||||
|
|
||||||
|
@ -170,6 +171,12 @@ int main(int argc, char *argv[])
|
||||||
argv++;
|
argv++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (strcmp(argv[1], "--csv") == 0) { /* turn on csv input processing */
|
||||||
|
CSV = true;
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
switch (argv[1][1]) {
|
switch (argv[1][1]) {
|
||||||
case 's':
|
case 's':
|
||||||
if (strcmp(argv[1], "-safe") == 0)
|
if (strcmp(argv[1], "-safe") == 0)
|
||||||
|
@ -179,7 +186,7 @@ int main(int argc, char *argv[])
|
||||||
fn = getarg(&argc, &argv, "no program filename");
|
fn = getarg(&argc, &argv, "no program filename");
|
||||||
if (npfile >= maxpfile) {
|
if (npfile >= maxpfile) {
|
||||||
maxpfile += 20;
|
maxpfile += 20;
|
||||||
pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile));
|
pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
|
||||||
if (pfile == NULL)
|
if (pfile == NULL)
|
||||||
FATAL("error allocating space for -f options");
|
FATAL("error allocating space for -f options");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: proto.h,v 1.21 2023/09/10 14:59:00 millert Exp $ */
|
/* $OpenBSD: proto.h,v 1.22 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -44,14 +44,13 @@ extern fa *mkdfa(const char *, bool);
|
||||||
extern int makeinit(fa *, bool);
|
extern int makeinit(fa *, bool);
|
||||||
extern void penter(Node *);
|
extern void penter(Node *);
|
||||||
extern void freetr(Node *);
|
extern void freetr(Node *);
|
||||||
extern int hexstr(const uschar **);
|
|
||||||
extern int quoted(const uschar **);
|
extern int quoted(const uschar **);
|
||||||
extern char *cclenter(const char *);
|
extern int *cclenter(const char *);
|
||||||
extern noreturn void overflo(const char *);
|
extern noreturn void overflo(const char *);
|
||||||
extern void cfoll(fa *, Node *);
|
extern void cfoll(fa *, Node *);
|
||||||
extern int first(Node *);
|
extern int first(Node *);
|
||||||
extern void follow(Node *);
|
extern void follow(Node *);
|
||||||
extern int member(int, const char *);
|
extern int member(int, int *);
|
||||||
extern int match(fa *, const char *);
|
extern int match(fa *, const char *);
|
||||||
extern int pmatch(fa *, const char *);
|
extern int pmatch(fa *, const char *);
|
||||||
extern int nematch(fa *, const char *);
|
extern int nematch(fa *, const char *);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: run.c,v 1.74 2022/09/21 01:42:59 millert Exp $ */
|
/* $OpenBSD: run.c,v 1.75 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -27,7 +27,6 @@ THIS SOFTWARE.
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <wchar.h>
|
|
||||||
#include <wctype.h>
|
#include <wctype.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <setjmp.h>
|
#include <setjmp.h>
|
||||||
|
@ -41,8 +40,10 @@ THIS SOFTWARE.
|
||||||
#include "awk.h"
|
#include "awk.h"
|
||||||
#include "awkgram.tab.h"
|
#include "awkgram.tab.h"
|
||||||
|
|
||||||
|
|
||||||
static void stdinit(void);
|
static void stdinit(void);
|
||||||
static void flush_all(void);
|
static void flush_all(void);
|
||||||
|
static char *wide_char_to_byte_str(int rune, size_t *outlen);
|
||||||
|
|
||||||
#if 1
|
#if 1
|
||||||
#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
|
#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
|
||||||
|
@ -580,11 +581,225 @@ Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ======== utf-8 code ========== */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Awk strings can contain ascii, random 8-bit items (eg Latin-1),
|
||||||
|
* or utf-8. u8_isutf tests whether a string starts with a valid
|
||||||
|
* utf-8 sequence, and returns 0 if not (e.g., high bit set).
|
||||||
|
* u8_nextlen returns length of next valid sequence, which is
|
||||||
|
* 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
|
||||||
|
* u8_strlen returns length of string in valid utf-8 sequences
|
||||||
|
* and/or high-bit bytes. Conversion functions go between byte
|
||||||
|
* number and character number.
|
||||||
|
*
|
||||||
|
* In theory, this behaves the same as before for non-utf8 bytes.
|
||||||
|
*
|
||||||
|
* Limited checking! This is a potential security hole.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* is s the beginning of a valid utf-8 string? */
|
||||||
|
/* return length 1..4 if yes, 0 if no */
|
||||||
|
int u8_isutf(const char *s)
|
||||||
|
{
|
||||||
|
int n, ret;
|
||||||
|
unsigned char c;
|
||||||
|
|
||||||
|
c = s[0];
|
||||||
|
if (c < 128)
|
||||||
|
return 1; /* what if it's 0? */
|
||||||
|
|
||||||
|
n = strlen(s);
|
||||||
|
if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
|
||||||
|
ret = 2; /* 110xxxxx 10xxxxxx */
|
||||||
|
} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
|
||||||
|
&& (s[2] & 0xC0) == 0x80) {
|
||||||
|
ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||||
|
} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
|
||||||
|
&& (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
|
||||||
|
ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
} else {
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert (prefix of) utf8 string to utf-32 rune. */
|
||||||
|
/* Sets *rune to the value, returns the length. */
|
||||||
|
/* No error checking: watch out. */
|
||||||
|
int u8_rune(int *rune, const char *s)
|
||||||
|
{
|
||||||
|
int n, ret;
|
||||||
|
unsigned char c;
|
||||||
|
|
||||||
|
c = s[0];
|
||||||
|
if (c < 128) {
|
||||||
|
*rune = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
n = strlen(s);
|
||||||
|
if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
|
||||||
|
*rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
|
||||||
|
ret = 2;
|
||||||
|
} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
|
||||||
|
&& (s[2] & 0xC0) == 0x80) {
|
||||||
|
*rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
|
||||||
|
/* 1110xxxx 10xxxxxx 10xxxxxx */
|
||||||
|
ret = 3;
|
||||||
|
} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
|
||||||
|
&& (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
|
||||||
|
*rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
|
||||||
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
|
||||||
|
ret = 4;
|
||||||
|
} else {
|
||||||
|
*rune = c;
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
return ret; /* returns one byte if sequence doesn't look like utf */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
|
||||||
|
int u8_nextlen(const char *s)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
|
||||||
|
len = u8_isutf(s);
|
||||||
|
if (len == 0)
|
||||||
|
len = 1;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return number of utf characters or single non-utf bytes */
|
||||||
|
int u8_strlen(const char *s)
|
||||||
|
{
|
||||||
|
int i, len, n, totlen;
|
||||||
|
unsigned char c;
|
||||||
|
|
||||||
|
n = strlen(s);
|
||||||
|
totlen = 0;
|
||||||
|
for (i = 0; i < n; i += len) {
|
||||||
|
c = s[i];
|
||||||
|
if (c < 128) {
|
||||||
|
len = 1;
|
||||||
|
} else {
|
||||||
|
len = u8_nextlen(&s[i]);
|
||||||
|
}
|
||||||
|
totlen++;
|
||||||
|
if (i > n)
|
||||||
|
FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
|
||||||
|
}
|
||||||
|
return totlen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convert utf-8 char number in a string to its byte offset */
|
||||||
|
int u8_char2byte(const char *s, int charnum)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
int bytenum = 0;
|
||||||
|
|
||||||
|
while (charnum > 0) {
|
||||||
|
n = u8_nextlen(s);
|
||||||
|
s += n;
|
||||||
|
bytenum += n;
|
||||||
|
charnum--;
|
||||||
|
}
|
||||||
|
return bytenum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* convert byte offset in s to utf-8 char number that starts there */
|
||||||
|
int u8_byte2char(const char *s, int bytenum)
|
||||||
|
{
|
||||||
|
int i, len, b;
|
||||||
|
int charnum = 0; /* BUG: what origin? */
|
||||||
|
/* should be 0 to match start==0 which means no match */
|
||||||
|
|
||||||
|
b = strlen(s);
|
||||||
|
if (bytenum > b) {
|
||||||
|
return -1; /* ??? */
|
||||||
|
}
|
||||||
|
for (i = 0; i <= bytenum; i += len) {
|
||||||
|
len = u8_nextlen(s+i);
|
||||||
|
charnum++;
|
||||||
|
}
|
||||||
|
return charnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* runetochar() adapted from rune.c in the Plan 9 distributione */
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
Runeerror = 128, /* from somewhere else */
|
||||||
|
Runemax = 0x10FFFF,
|
||||||
|
|
||||||
|
Bit1 = 7,
|
||||||
|
Bitx = 6,
|
||||||
|
Bit2 = 5,
|
||||||
|
Bit3 = 4,
|
||||||
|
Bit4 = 3,
|
||||||
|
Bit5 = 2,
|
||||||
|
|
||||||
|
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
|
||||||
|
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
|
||||||
|
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
|
||||||
|
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
|
||||||
|
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
|
||||||
|
T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
|
||||||
|
|
||||||
|
Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
|
||||||
|
Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
|
||||||
|
Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
|
||||||
|
Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
|
||||||
|
|
||||||
|
Maskx = (1<<Bitx)-1, /* 0011 1111 */
|
||||||
|
Testx = Maskx ^ 0xFF, /* 1100 0000 */
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
int runetochar(char *str, int c)
|
||||||
|
{
|
||||||
|
/* one character sequence 00000-0007F => 00-7F */
|
||||||
|
if (c <= Rune1) {
|
||||||
|
str[0] = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* two character sequence 00080-007FF => T2 Tx */
|
||||||
|
if (c <= Rune2) {
|
||||||
|
str[0] = T2 | (c >> 1*Bitx);
|
||||||
|
str[1] = Tx | (c & Maskx);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* three character sequence 00800-0FFFF => T3 Tx Tx */
|
||||||
|
if (c > Runemax)
|
||||||
|
c = Runeerror;
|
||||||
|
if (c <= Rune3) {
|
||||||
|
str[0] = T3 | (c >> 2*Bitx);
|
||||||
|
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
|
||||||
|
str[2] = Tx | (c & Maskx);
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
|
||||||
|
str[0] = T4 | (c >> 3*Bitx);
|
||||||
|
str[1] = Tx | ((c >> 2*Bitx) & Maskx);
|
||||||
|
str[2] = Tx | ((c >> 1*Bitx) & Maskx);
|
||||||
|
str[3] = Tx | (c & Maskx);
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ========== end of utf8 code =========== */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Cell *matchop(Node **a, int n) /* ~ and match() */
|
Cell *matchop(Node **a, int n) /* ~ and match() */
|
||||||
{
|
{
|
||||||
Cell *x, *y;
|
Cell *x, *y;
|
||||||
char *s, *t;
|
char *s, *t;
|
||||||
int i;
|
int i;
|
||||||
|
int cstart, cpatlen, len;
|
||||||
fa *pfa;
|
fa *pfa;
|
||||||
int (*mf)(fa *, const char *) = match, mode = 0;
|
int (*mf)(fa *, const char *) = match, mode = 0;
|
||||||
|
|
||||||
|
@ -605,9 +820,21 @@ Cell *matchop(Node **a, int n) /* ~ and match() */
|
||||||
}
|
}
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
if (n == MATCHFCN) {
|
if (n == MATCHFCN) {
|
||||||
int start = patbeg - s + 1;
|
int start = patbeg - s + 1; /* origin 1 */
|
||||||
if (patlen < 0)
|
if (patlen < 0) {
|
||||||
start = 0;
|
start = 0; /* not found */
|
||||||
|
} else {
|
||||||
|
cstart = u8_byte2char(s, start-1);
|
||||||
|
cpatlen = 0;
|
||||||
|
for (i = 0; i < patlen; i += len) {
|
||||||
|
len = u8_nextlen(patbeg+i);
|
||||||
|
cpatlen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
start = cstart;
|
||||||
|
patlen = cpatlen;
|
||||||
|
}
|
||||||
|
|
||||||
setfval(rstartloc, (Awkfloat) start);
|
setfval(rstartloc, (Awkfloat) start);
|
||||||
setfval(rlengthloc, (Awkfloat) patlen);
|
setfval(rlengthloc, (Awkfloat) patlen);
|
||||||
x = gettemp();
|
x = gettemp();
|
||||||
|
@ -658,10 +885,15 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
|
||||||
int i;
|
int i;
|
||||||
Cell *x, *y;
|
Cell *x, *y;
|
||||||
Awkfloat j;
|
Awkfloat j;
|
||||||
|
bool x_is_nan, y_is_nan;
|
||||||
|
|
||||||
x = execute(a[0]);
|
x = execute(a[0]);
|
||||||
y = execute(a[1]);
|
y = execute(a[1]);
|
||||||
|
x_is_nan = isnan(x->fval);
|
||||||
|
y_is_nan = isnan(y->fval);
|
||||||
if (x->tval&NUM && y->tval&NUM) {
|
if (x->tval&NUM && y->tval&NUM) {
|
||||||
|
if ((x_is_nan || y_is_nan) && n != NE)
|
||||||
|
return(False);
|
||||||
j = x->fval - y->fval;
|
j = x->fval - y->fval;
|
||||||
i = j<0? -1: (j>0? 1: 0);
|
i = j<0? -1: (j>0? 1: 0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -674,7 +906,8 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */
|
||||||
else return(False);
|
else return(False);
|
||||||
case LE: if (i<=0) return(True);
|
case LE: if (i<=0) return(True);
|
||||||
else return(False);
|
else return(False);
|
||||||
case NE: if (i!=0) return(True);
|
case NE: if (x_is_nan && y_is_nan) return(True);
|
||||||
|
else if (i!=0) return(True);
|
||||||
else return(False);
|
else return(False);
|
||||||
case EQ: if (i == 0) return(True);
|
case EQ: if (i == 0) return(True);
|
||||||
else return(False);
|
else return(False);
|
||||||
|
@ -743,6 +976,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */
|
||||||
Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
|
Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
|
||||||
{
|
{
|
||||||
int k, m, n;
|
int k, m, n;
|
||||||
|
int mb, nb;
|
||||||
char *s;
|
char *s;
|
||||||
int temp;
|
int temp;
|
||||||
Cell *x, *y, *z = NULL;
|
Cell *x, *y, *z = NULL;
|
||||||
|
@ -778,12 +1012,16 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */
|
||||||
n = 0;
|
n = 0;
|
||||||
else if (n > k - m)
|
else if (n > k - m)
|
||||||
n = k - m;
|
n = k - m;
|
||||||
|
/* m is start, n is length from there */
|
||||||
DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
|
DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
|
||||||
y = gettemp();
|
y = gettemp();
|
||||||
temp = s[n+m-1]; /* with thanks to John Linderman */
|
mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
|
||||||
s[n+m-1] = '\0';
|
nb = u8_char2byte(s, m-1+n); /* byte offset of end+1 char in s */
|
||||||
setsval(y, s + m - 1);
|
|
||||||
s[n+m-1] = temp;
|
temp = s[nb]; /* with thanks to John Linderman */
|
||||||
|
s[nb] = '\0';
|
||||||
|
setsval(y, s + mb);
|
||||||
|
s[nb] = temp;
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
return(y);
|
return(y);
|
||||||
}
|
}
|
||||||
|
@ -804,7 +1042,15 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
|
||||||
for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
|
for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
|
||||||
continue;
|
continue;
|
||||||
if (*p2 == '\0') {
|
if (*p2 == '\0') {
|
||||||
v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */
|
/* v = (Awkfloat) (p1 - s1 + 1); origin 1 */
|
||||||
|
|
||||||
|
/* should be a function: used in match() as well */
|
||||||
|
int i, len;
|
||||||
|
v = 0;
|
||||||
|
for (i = 0; i < p1-s1+1; i += len) {
|
||||||
|
len = u8_nextlen(s1+i);
|
||||||
|
v++;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -814,6 +1060,18 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */
|
||||||
return(z);
|
return(z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int has_utf8(char *s) /* return 1 if s contains any utf-8 (2 bytes or more) character */
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
|
||||||
|
for (n = 0; *s != 0; s += n) {
|
||||||
|
n = u8_nextlen(s);
|
||||||
|
if (n > 1)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
#define MAXNUMSIZE 50
|
#define MAXNUMSIZE 50
|
||||||
|
|
||||||
int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
|
int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */
|
||||||
|
@ -856,7 +1114,6 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||||
s += 2;
|
s += 2;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
/* have to be real careful in case this is a huge number, eg, %100000d */
|
|
||||||
fmtwd = atoi(s+1);
|
fmtwd = atoi(s+1);
|
||||||
if (fmtwd < 0)
|
if (fmtwd < 0)
|
||||||
fmtwd = -fmtwd;
|
fmtwd = -fmtwd;
|
||||||
|
@ -929,7 +1186,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||||
n = fmtwd;
|
n = fmtwd;
|
||||||
adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
|
adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
|
||||||
switch (flag) {
|
switch (flag) {
|
||||||
case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
|
case '?':
|
||||||
|
snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */
|
||||||
t = getsval(x);
|
t = getsval(x);
|
||||||
n = strlen(t);
|
n = strlen(t);
|
||||||
if (fmtwd > n)
|
if (fmtwd > n)
|
||||||
|
@ -943,29 +1201,176 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co
|
||||||
case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
|
case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
|
||||||
case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
|
case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
|
||||||
case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
|
case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
|
||||||
case 's':
|
|
||||||
|
case 's': {
|
||||||
t = getsval(x);
|
t = getsval(x);
|
||||||
n = strlen(t);
|
n = strlen(t);
|
||||||
if (fmtwd > n)
|
/* if simple format or no utf-8 in the string, sprintf works */
|
||||||
n = fmtwd;
|
if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
|
||||||
if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
|
if (fmtwd > n)
|
||||||
FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
|
n = fmtwd;
|
||||||
snprintf(p, BUFSZ(p), fmt, t);
|
if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
|
||||||
|
FATAL("huge string/format (%d chars) in printf %.30s..." \
|
||||||
|
" ran format() out of memory", n, t);
|
||||||
|
snprintf(p, BUFSZ(p), fmt, t);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get here if string has utf-8 chars and fmt is not plain %s */
|
||||||
|
/* "%-w.ps", where -, w and .p are all optional */
|
||||||
|
/* '0' before the w is a flag character */
|
||||||
|
/* fmt points at % */
|
||||||
|
int ljust = 0, wid = 0, prec = n, pad = 0;
|
||||||
|
char *f = fmt+1;
|
||||||
|
if (f[0] == '-') {
|
||||||
|
ljust = 1;
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
// flags '0' and '+' are recognized but skipped
|
||||||
|
if (f[0] == '0') {
|
||||||
|
f++;
|
||||||
|
if (f[0] == '+')
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
if (f[0] == '+') {
|
||||||
|
f++;
|
||||||
|
if (f[0] == '0')
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
if (isdigit((uschar)f[0])) { /* there is a wid */
|
||||||
|
wid = strtol(f, &f, 10);
|
||||||
|
}
|
||||||
|
if (f[0] == '.') { /* there is a .prec */
|
||||||
|
prec = strtol(++f, &f, 10);
|
||||||
|
}
|
||||||
|
if (prec > u8_strlen(t))
|
||||||
|
prec = u8_strlen(t);
|
||||||
|
pad = wid>prec ? wid - prec : 0; // has to be >= 0
|
||||||
|
int i, k, n;
|
||||||
|
|
||||||
|
if (ljust) { // print prec chars from t, then pad blanks
|
||||||
|
n = u8_char2byte(t, prec);
|
||||||
|
for (k = 0; k < n; k++) {
|
||||||
|
//putchar(t[k]);
|
||||||
|
*p++ = t[k];
|
||||||
|
}
|
||||||
|
for (i = 0; i < pad; i++) {
|
||||||
|
//printf(" ");
|
||||||
|
*p++ = ' ';
|
||||||
|
}
|
||||||
|
} else { // print pad blanks, then prec chars from t
|
||||||
|
for (i = 0; i < pad; i++) {
|
||||||
|
//printf(" ");
|
||||||
|
*p++ = ' ';
|
||||||
|
}
|
||||||
|
n = u8_char2byte(t, prec);
|
||||||
|
for (k = 0; k < n; k++) {
|
||||||
|
//putchar(t[k]);
|
||||||
|
*p++ = t[k];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*p = 0;
|
||||||
break;
|
break;
|
||||||
case 'c':
|
}
|
||||||
|
|
||||||
|
case 'c': {
|
||||||
|
/*
|
||||||
|
* If a numeric value is given, awk should just turn
|
||||||
|
* it into a character and print it:
|
||||||
|
* BEGIN { printf("%c\n", 65) }
|
||||||
|
* prints "A".
|
||||||
|
*
|
||||||
|
* But what if the numeric value is > 128 and
|
||||||
|
* represents a valid Unicode code point?!? We do
|
||||||
|
* our best to convert it back into UTF-8. If we
|
||||||
|
* can't, we output the encoding of the Unicode
|
||||||
|
* "invalid character", 0xFFFD.
|
||||||
|
*/
|
||||||
if (isnum(x)) {
|
if (isnum(x)) {
|
||||||
if ((int)getfval(x))
|
int charval = (int) getfval(x);
|
||||||
snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
|
|
||||||
else {
|
if (charval != 0) {
|
||||||
|
if (charval < 128)
|
||||||
|
snprintf(p, BUFSZ(p), fmt, charval);
|
||||||
|
else {
|
||||||
|
// possible unicode character
|
||||||
|
size_t count;
|
||||||
|
char *bs = wide_char_to_byte_str(charval, &count);
|
||||||
|
|
||||||
|
if (bs == NULL) { // invalid character
|
||||||
|
// use unicode invalid character, 0xFFFD
|
||||||
|
bs = "\357\277\275";
|
||||||
|
count = 3;
|
||||||
|
}
|
||||||
|
t = bs;
|
||||||
|
n = count;
|
||||||
|
goto format_percent_c;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
*p++ = '\0'; /* explicit null byte */
|
*p++ = '\0'; /* explicit null byte */
|
||||||
*p = '\0'; /* next output will start here */
|
*p = '\0'; /* next output will start here */
|
||||||
}
|
}
|
||||||
} else
|
break;
|
||||||
|
}
|
||||||
|
t = getsval(x);
|
||||||
|
n = u8_nextlen(t);
|
||||||
|
format_percent_c:
|
||||||
|
if (n < 2) { /* not utf8 */
|
||||||
snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
|
snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// utf8 character, almost same song and dance as for %s
|
||||||
|
int ljust = 0, wid = 0, prec = n, pad = 0;
|
||||||
|
char *f = fmt+1;
|
||||||
|
if (f[0] == '-') {
|
||||||
|
ljust = 1;
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
// flags '0' and '+' are recognized but skipped
|
||||||
|
if (f[0] == '0') {
|
||||||
|
f++;
|
||||||
|
if (f[0] == '+')
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
if (f[0] == '+') {
|
||||||
|
f++;
|
||||||
|
if (f[0] == '0')
|
||||||
|
f++;
|
||||||
|
}
|
||||||
|
if (isdigit((uschar)f[0])) { /* there is a wid */
|
||||||
|
wid = strtol(f, &f, 10);
|
||||||
|
}
|
||||||
|
if (f[0] == '.') { /* there is a .prec */
|
||||||
|
prec = strtol(++f, &f, 10);
|
||||||
|
}
|
||||||
|
if (prec > 1) // %c --> only one character
|
||||||
|
prec = 1;
|
||||||
|
pad = wid>prec ? wid - prec : 0; // has to be >= 0
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (ljust) { // print one char from t, then pad blanks
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
*p++ = t[i];
|
||||||
|
for (i = 0; i < pad; i++) {
|
||||||
|
//printf(" ");
|
||||||
|
*p++ = ' ';
|
||||||
|
}
|
||||||
|
} else { // print pad blanks, then prec chars from t
|
||||||
|
for (i = 0; i < pad; i++) {
|
||||||
|
//printf(" ");
|
||||||
|
*p++ = ' ';
|
||||||
|
}
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
*p++ = t[i];
|
||||||
|
}
|
||||||
|
*p = 0;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
FATAL("can't happen: bad conversion %c in format()", flag);
|
FATAL("can't happen: bad conversion %c in format()", flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
p += strlen(p);
|
p += strlen(p);
|
||||||
s++;
|
s++;
|
||||||
|
@ -1265,7 +1670,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||||
char *origfs = NULL;
|
char *origfs = NULL;
|
||||||
int sep;
|
int sep;
|
||||||
char temp, num[50];
|
char temp, num[50];
|
||||||
int n, tempstat, arg3type;
|
int j, n, tempstat, arg3type;
|
||||||
double result;
|
double result;
|
||||||
|
|
||||||
y = execute(a[0]); /* source string */
|
y = execute(a[0]); /* source string */
|
||||||
|
@ -1274,20 +1679,22 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||||
FATAL("out of space in split");
|
FATAL("out of space in split");
|
||||||
tempfree(y);
|
tempfree(y);
|
||||||
arg3type = ptoi(a[3]);
|
arg3type = ptoi(a[3]);
|
||||||
if (a[2] == NULL) /* fs string */
|
if (a[2] == NULL) { /* BUG: CSV should override implicit fs but not explicit */
|
||||||
fs = getsval(fsloc);
|
fs = getsval(fsloc);
|
||||||
else if (arg3type == STRING) { /* split(str,arr,"string") */
|
} else if (arg3type == STRING) { /* split(str,arr,"string") */
|
||||||
x = execute(a[2]);
|
x = execute(a[2]);
|
||||||
fs = origfs = strdup(getsval(x));
|
fs = origfs = strdup(getsval(x));
|
||||||
if (fs == NULL)
|
if (fs == NULL)
|
||||||
FATAL("out of space in split");
|
FATAL("out of space in split");
|
||||||
tempfree(x);
|
tempfree(x);
|
||||||
} else if (arg3type == REGEXPR)
|
} else if (arg3type == REGEXPR) {
|
||||||
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
|
fs = "(regexpr)"; /* split(str,arr,/regexpr/) */
|
||||||
else
|
} else {
|
||||||
FATAL("illegal type of split");
|
FATAL("illegal type of split");
|
||||||
|
}
|
||||||
sep = *fs;
|
sep = *fs;
|
||||||
ap = execute(a[1]); /* array name */
|
ap = execute(a[1]); /* array name */
|
||||||
|
/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
|
||||||
freesymtab(ap);
|
freesymtab(ap);
|
||||||
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
|
DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
|
||||||
ap->tval &= ~STR;
|
ap->tval &= ~STR;
|
||||||
|
@ -1341,7 +1748,41 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||||
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
|
||||||
spdone:
|
spdone:
|
||||||
pfa = NULL;
|
pfa = NULL;
|
||||||
} else if (sep == ' ') {
|
|
||||||
|
} else if (a[2] == NULL && CSV) { /* CSV only if no explicit separator */
|
||||||
|
char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
|
||||||
|
for (;;) {
|
||||||
|
char *fr = newt;
|
||||||
|
n++;
|
||||||
|
if (*s == '"' ) { /* start of "..." */
|
||||||
|
for (s++ ; *s != '\0'; ) {
|
||||||
|
if (*s == '"' && s[1] != '\0' && s[1] == '"') {
|
||||||
|
s += 2; /* doubled quote */
|
||||||
|
*fr++ = '"';
|
||||||
|
} else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
|
||||||
|
s++; /* skip over closing quote */
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*fr++ = *s++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*fr++ = 0;
|
||||||
|
} else { /* unquoted field */
|
||||||
|
while (*s != ',' && *s != '\0')
|
||||||
|
*fr++ = *s++;
|
||||||
|
*fr++ = 0;
|
||||||
|
}
|
||||||
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
|
if (is_number(newt, &result))
|
||||||
|
setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
|
||||||
|
else
|
||||||
|
setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
|
||||||
|
if (*s++ == '\0')
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
free(newt);
|
||||||
|
|
||||||
|
} else if (!CSV && sep == ' ') { /* usual case: split on white space */
|
||||||
for (n = 0; ; ) {
|
for (n = 0; ; ) {
|
||||||
#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
|
#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
|
||||||
while (ISWS(*s))
|
while (ISWS(*s))
|
||||||
|
@ -1364,19 +1805,25 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */
|
||||||
if (*s != '\0')
|
if (*s != '\0')
|
||||||
s++;
|
s++;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
|
} else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */
|
||||||
for (n = 0; *s != '\0'; s++) {
|
for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
|
||||||
char buf[2];
|
char buf[10];
|
||||||
n++;
|
n++;
|
||||||
snprintf(num, sizeof(num), "%d", n);
|
snprintf(num, sizeof(num), "%d", n);
|
||||||
buf[0] = *s;
|
|
||||||
buf[1] = '\0';
|
for (j = 0; j < u8_nextlen(s); j++) {
|
||||||
|
buf[j] = s[j];
|
||||||
|
}
|
||||||
|
buf[j] = '\0';
|
||||||
|
|
||||||
if (isdigit((uschar)buf[0]))
|
if (isdigit((uschar)buf[0]))
|
||||||
setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
|
setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
|
||||||
else
|
else
|
||||||
setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
|
setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
|
||||||
}
|
}
|
||||||
} else if (*s != '\0') {
|
|
||||||
|
} else if (*s != '\0') { /* some random single character */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
n++;
|
n++;
|
||||||
t = s;
|
t = s;
|
||||||
|
@ -1535,6 +1982,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int),
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
wchar_t wc;
|
wchar_t wc;
|
||||||
size_t sz = MB_CUR_MAX;
|
size_t sz = MB_CUR_MAX;
|
||||||
|
int unused;
|
||||||
|
|
||||||
if (sz == 1) {
|
if (sz == 1) {
|
||||||
buf = tostring(s);
|
buf = tostring(s);
|
||||||
|
@ -1554,7 +2002,7 @@ static char *nawk_convert(const char *s, int (*fun_c)(int),
|
||||||
* doesn't work.)
|
* doesn't work.)
|
||||||
* Increment said variable to avoid a different warning.
|
* Increment said variable to avoid a different warning.
|
||||||
*/
|
*/
|
||||||
int unused = wctomb(NULL, L'\0');
|
unused = wctomb(NULL, L'\0');
|
||||||
unused++;
|
unused++;
|
||||||
|
|
||||||
ps = s;
|
ps = s;
|
||||||
|
@ -1629,7 +2077,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
|
||||||
if (isarr(x))
|
if (isarr(x))
|
||||||
u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
|
u = ((Array *) x->sval)->nelem; /* GROT. should be function*/
|
||||||
else
|
else
|
||||||
u = strlen(getsval(x));
|
u = u8_strlen(getsval(x));
|
||||||
break;
|
break;
|
||||||
case FLOG:
|
case FLOG:
|
||||||
errno = 0;
|
errno = 0;
|
||||||
|
@ -2402,3 +2850,41 @@ void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
|
||||||
*pb_ptr = pb;
|
*pb_ptr = pb;
|
||||||
*sptr_ptr = sptr;
|
*sptr_ptr = sptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static char *wide_char_to_byte_str(int rune, size_t *outlen)
|
||||||
|
{
|
||||||
|
static char buf[5];
|
||||||
|
int len;
|
||||||
|
|
||||||
|
if (rune < 0 || rune > 0x10FFFF)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
memset(buf, 0, sizeof(buf));
|
||||||
|
|
||||||
|
len = 0;
|
||||||
|
if (rune <= 0x0000007F) {
|
||||||
|
buf[len++] = rune;
|
||||||
|
} else if (rune <= 0x000007FF) {
|
||||||
|
// 110xxxxx 10xxxxxx
|
||||||
|
buf[len++] = 0xC0 | (rune >> 6);
|
||||||
|
buf[len++] = 0x80 | (rune & 0x3F);
|
||||||
|
} else if (rune <= 0x0000FFFF) {
|
||||||
|
// 1110xxxx 10xxxxxx 10xxxxxx
|
||||||
|
buf[len++] = 0xE0 | (rune >> 12);
|
||||||
|
buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
|
||||||
|
buf[len++] = 0x80 | (rune & 0x3F);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// 0x00010000 - 0x10FFFF
|
||||||
|
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
buf[len++] = 0xF0 | (rune >> 18);
|
||||||
|
buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
|
||||||
|
buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
|
||||||
|
buf[len++] = 0x80 | (rune & 0x3F);
|
||||||
|
}
|
||||||
|
|
||||||
|
*outlen = len;
|
||||||
|
buf[len++] = '\0';
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* $OpenBSD: tran.c,v 1.36 2022/09/21 01:42:59 millert Exp $ */
|
/* $OpenBSD: tran.c,v 1.37 2023/09/17 14:49:44 millert Exp $ */
|
||||||
/****************************************************************
|
/****************************************************************
|
||||||
Copyright (C) Lucent Technologies 1997
|
Copyright (C) Lucent Technologies 1997
|
||||||
All Rights Reserved
|
All Rights Reserved
|
||||||
|
@ -309,7 +309,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
|
||||||
} else if (&vp->fval == NF) {
|
} else if (&vp->fval == NF) {
|
||||||
donerec = false; /* mark $0 invalid */
|
donerec = false; /* mark $0 invalid */
|
||||||
setlastfld(f);
|
setlastfld(f);
|
||||||
DPRINTF("setting NF to %g\n", f);
|
DPRINTF("setfval: setting NF to %g\n", f);
|
||||||
} else if (isrec(vp)) {
|
} else if (isrec(vp)) {
|
||||||
donefld = false; /* mark $1... invalid */
|
donefld = false; /* mark $1... invalid */
|
||||||
donerec = true;
|
donerec = true;
|
||||||
|
@ -349,6 +349,10 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
|
||||||
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
|
(void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld);
|
||||||
if ((vp->tval & (NUM | STR)) == 0)
|
if ((vp->tval & (NUM | STR)) == 0)
|
||||||
funnyvar(vp, "assign to");
|
funnyvar(vp, "assign to");
|
||||||
|
if (CSV && (vp == rsloc))
|
||||||
|
WARNING("danger: don't set RS when --csv is in effect");
|
||||||
|
if (CSV && (vp == fsloc))
|
||||||
|
WARNING("danger: don't set FS when --csv is in effect");
|
||||||
if (isfld(vp)) {
|
if (isfld(vp)) {
|
||||||
donerec = false; /* mark $0 invalid */
|
donerec = false; /* mark $0 invalid */
|
||||||
fldno = atoi(vp->nval);
|
fldno = atoi(vp->nval);
|
||||||
|
@ -376,7 +380,7 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
|
||||||
donerec = false; /* mark $0 invalid */
|
donerec = false; /* mark $0 invalid */
|
||||||
f = getfval(vp);
|
f = getfval(vp);
|
||||||
setlastfld(f);
|
setlastfld(f);
|
||||||
DPRINTF("setting NF to %g\n", f);
|
DPRINTF("setsval: setting NF to %g\n", f);
|
||||||
}
|
}
|
||||||
|
|
||||||
return(vp->sval);
|
return(vp->sval);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.\" $OpenBSD: tmux.1,v 1.932 2023/09/15 06:31:49 nicm Exp $
|
.\" $OpenBSD: tmux.1,v 1.933 2023/09/16 16:18:29 nicm Exp $
|
||||||
.\"
|
.\"
|
||||||
.\" Copyright (c) 2007 Nicholas Marriott <nicholas.marriott@gmail.com>
|
.\" Copyright (c) 2007 Nicholas Marriott <nicholas.marriott@gmail.com>
|
||||||
.\"
|
.\"
|
||||||
|
@ -14,7 +14,7 @@
|
||||||
.\" IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
.\" IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
||||||
.\" OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
.\" OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
.\"
|
.\"
|
||||||
.Dd $Mdocdate: September 15 2023 $
|
.Dd $Mdocdate: September 16 2023 $
|
||||||
.Dt TMUX 1
|
.Dt TMUX 1
|
||||||
.Os
|
.Os
|
||||||
.Sh NAME
|
.Sh NAME
|
||||||
|
@ -2004,18 +2004,6 @@ Move the cursor to the start of the line.
|
||||||
(emacs: M-R)
|
(emacs: M-R)
|
||||||
.Xc
|
.Xc
|
||||||
Move to the top line.
|
Move to the top line.
|
||||||
.It Xo
|
|
||||||
.Ic next-prompt
|
|
||||||
(vi: C-n)
|
|
||||||
(emacs: C-n)
|
|
||||||
.Xc
|
|
||||||
Move to the next prompt.
|
|
||||||
.It Xo
|
|
||||||
.Ic previous-prompt
|
|
||||||
(vi: C-p)
|
|
||||||
(emacs: C-p)
|
|
||||||
.Xc
|
|
||||||
Move to the previous prompt.
|
|
||||||
.El
|
.El
|
||||||
.Pp
|
.Pp
|
||||||
The search commands come in several varieties:
|
The search commands come in several varieties:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue