TclAwk - A Tcl version of Awk

awk 是个文本处理程序。它每次读取一行,并用空格把改行分割为多个字段。

用户通常通过模式匹配来分析文本,和从中提取内容。

比如,可以利用 awk 实现一个简单的记账本程序。

2022-05-13 lunch  25.00
2022-05-13 coffee 13.00
2022-05-13 bus     2.00
2022-05-12 lunch  18.00

那么,我们可以计算吃午饭总共花了多少钱。

/lunch/ {
  sum += $3
}

END {
  print "sum of lunch =", sum;
}

TclAwk

用 Tcl 来模拟 Awk,可以有至少两方面的用处

  1. 描述 Awk 程序的流程逻辑
  2. 便于用 Tcl 语言解析文本

TclAwk 的主逻辑

proc tclawk {} {
  global 0 line

  chan close stdin

  run-BEGIN
  foreach FILENAME $::argv {
    run-BEGINFILE
    open $FILENAME
    while {[chan gets stdin 0]>=0} {
      set line $0
      run-RULESET
    }
    chan close stdin
    run-ENDFILE
  }
  run-END
  exit
}

具体的代码实现呢,参加如下。

TclAwk 代码

#!/usr/bin/env tclsh
# vim:set syntax=tcl: #
#==================================================================
# A Tcl version of Awk
#==================================================================

global *BEGIN* *END*
global *BEGINFILE* *ENDFILE*
global FILENAME
global NF FNR
global 0

set *BEGIN*   [list]
set *END*     [list]
set *BEGINFILE* [list]
set *ENDFILE* [list]
set *RULESET* [list]

proc BEGIN {body} {
  global *BEGIN*
  lappend *BEGIN* $body
}

proc END {body} {
  global *END*
  lappend *END* $body
}

proc BEGINFILE {body} {
  global *BEGINFILE*
  lappend *BEGINFILE* $body
}

proc ENDFILE {body} {
  global *ENDFILE*
  lappend *ENDFILE* $body
}

proc MATCH {pattern body} {
  global *RULESET*

  set matchExpr "\[string match [list $pattern] \$::0\]"
  set code [subst {
    if {$matchExpr} {
      $body
    }
  }]

  lappend *RULESET* $code
}

proc print {args} {
  if {[llength $args]==0} {
    puts $::0
  } else {
    puts [join $args]
  }
}

proc tclawk {args} {
  trace remove execution exit enter ::tclawk

  global FILENAME
  global 0

  chan close stdin

  run-BEGIN
  foreach FILENAME $::argv {
    run-BEGINFILE
    open $FILENAME
    set FNR 0
    while {[chan gets stdin 0]>=0} {
      set ::line $0
      incr NR
      incr FNR
      run-RULESET
    }
    chan close stdin
    run-ENDFILE
  }
  run-END
  exit
}

proc run-BEGIN {} {
  foreach code [set ::*BEGIN*] {
    uplevel #0 $code
  }
}

proc run-END {} {
  foreach code [set ::*END*] {
    uplevel #0 $code
  }
}

proc run-BEGINFILE {} {
  foreach code [set ::*BEGINFILE*] {
    uplevel #0 $code
  }
}

proc run-ENDFILE {} {
  foreach code [set ::*ENDFILE*] {
    uplevel #0 $code
  }
}

proc run-RULESET {} {
  foreach code [set ::*RULESET*] {
    uplevel #0 $code
  }
}


trace add execution exit enter ::tclawk

#---------------------------------------------------------#

return

## Usage Example

BEGINFILE {
  print "start", FILENAME;
}


MATCH "hello *" {
  print $0
}


END {
  print "DONE"
  print "NR = $NR"
}