Taple 코드 설명(Chapter4)

Tapl(Types and Programing Language)을 읽을 때 한 장에 OCAml의 샘플 코드가 들어 있다.
이 장들은 형식을 썼지만 렉서, 파어 등은 쓰지 않았다.따라서 HP에서 Implementations로 날아가면 소스 코드가 있습니다.거기에 온전한 코드가 쓰여 있다.
영어 댓글도 있지만 개관을 이해할 수 있도록 이 글을 먼저 쓰겠습니다.
첫 번째는 이번에 Chapter4 코드에 대한 접촉입니다.
코드는 내가 홈페이지에서 가져온 것으로 일본어로 평론한 것들GitHub에 놓여 있는데, 거기에서 발췌한 코드가 실려 있다.
우선 본문Makefile의 구축보다 간단한(주관적)Dune을 사용해 보았다.

dune

아래 설명한 대로 부팅할 필요가 없습니다Makefile.
언뜻 보기에dune에서 사용된 문법은 참신할 수 있지만 문법 트리를 그대로 썼을 뿐target 생성 파일에서deps는 의존 파일이다.action에는 실제로 어떻게 해야 하는지가 기술되어 있다.env 부분에는 구축할 때의 Warning을 방지하는 표지가 쓰여 있다.
dune

(rule
 (target lexer.ml)
 (deps lexer.mll)
 (action
  (chdir
   %{workspace_root}
   (run %{bin:ocamllex} -q -o %{target} %{deps}))))

(rule
 (targets parser.ml parser.mli)
 (deps parser.mly)
 (action
  (chdir
   %{workspace_root}
   (run %{bin:ocamlyacc} %{deps}))))

(env
 (dev
  (flags
   (:standard -w -a))))

(executable
 (name main))

또한dune-project은 생성 시 사용되고 있다.
dune-project

(lang dune 2.9)

전체 이미지

문서별 설명

이 항목에는 dune, dune-project, .ocamlformat, 테스트test.f 외에 .ml, .mli, .mll 4개의 확장자가 있는 파일이 있습니다..mly 설치 파일에 정의를 씁니다..ml 인터페이스 파일에 사양을 기록합니다..mliocamllex라는 소프트웨어는 자구 해석 프로그램을 만드는 데 쓰인다..mllocamlyacc라는 소프트웨어는 문법 해석 프로그램을 만드는 데 쓰인다.
그중 .mly만 평론이 있는데 작가가 이해해 주기를 바란다.그래서 이번에는 이 문건에 대한 해설에 집중할 것이다.
4가지 총행수는 다음과 같이 짧다.

❯ fd "(ml|mli|mll|mly)$" | xargs wc -l
      49 core.ml
       9 core.mli
     216 lexer.mll
     100 main.ml
     141 parser.mly
      73 support.ml
      57 support.mli
     102 syntax.ml
      24 syntax.mli
     771 total

의존 관계의 순서에 따라 설명한다.

.mli support.ml

여기에 각종 파일에 사용되는 함수가 쓰여 있다.
처음에는 서브 모듈support.mli의 인터페이스를 제한했다.여기에 Pervasive의 서명이 쓰여 있다.이것은 표준 출력에 사용되며, 시간을 단축해서 불러낼 수 있다.
support.mli

(* module Support

   Collects a number of low-level facilities used by the other modules
   in the typechecker/evaluator.
*)

(* ------------------------------------------------------------------------ *)
(* Some pervasive abbreviations -- opened everywhere by convention *)
(* 短縮形の定義 *)

module Pervasive : sig
  val pr : string -> unit
end

다음 부분은 서브모듈pr의 인터페이스를 제한한다.
먼저 파일 정보의 유형과 함수 규격을 썼다.
supprot.mli

(* ------------------------------------------------------------------------ *)
(* Error printing utilities -- opened everywhere by convention *)

module Error : sig
  (* An exception raised by the low-level error printer; exported
     here so that it can be caught in module Main and converted into
     an exit status for the whole program. *)
  (* ポインタに関するエラーが生じた際に使う例外 *)
  exception Exit of int

  (* An element of the type info represents a "file position": a
     file name, line number, and character position within the line.
     Used for printing error messages. *)
  (* ファイル情報に関する要素に使う型 *)
  type info

  (* ダミーに使われる値 *)
  val dummyinfo : info

  (* Create file position info: filename lineno column *)
  (* ファイル情報に関するデータを作る関数 *)
  val createInfo : string -> int -> int -> info

  (* ファイル情報を出力する関数 *)
  val printInfo : info -> unit

다음 부분에는 해석기에 사용되는 파일 정보의 유형과 이와 관련된 함수의 규격이 적혀 있다.
support.mli

  (* A convenient datatype for a "value with file info."  Used in
     the lexer and parser. *)
  (* パーサーで用いるファイル情報に関する型 *)
  type 'a withinfo = { i : info; v : 'a }

  (* Print an error message and fail.  The printing function is called
     in a context where the formatter is processing an hvbox.  Insert
     calls to Format.print_space to print a space or, if necessary,
     break the line at that point. *)
  (* エラーメッセージを出してプログラムを終了する *)
  val errf : (unit -> unit) -> 'a

  (* エラーがどこで起きたかを出力 *)
  val errfAt : info -> (unit -> unit) -> 'a

  (* Convenient wrappers for the above, for the common case where the
     action to be performed is just to print a given string. *)
  (* 上の関数をまとめる関数 *)
  val err : string -> 'a

  (* 同上 *)
  val error : info -> string -> 'a

  (* Variants that print a message but do not fail afterwards *)
  (* 上の場合はプログラムが終了するが警告だけで終了はしない *)
  val warning : string -> unit

  (* 警告がどこで起きたかを出力 *)
  val warningAt : info -> string -> unit
end

Error에 이러한 실현이 쓰여 있다.
support.ml

open Format

module Error = struct
  exception Exit of int

  type info = FI of string * int * int | UNKNOWN

  type 'a withinfo = { i : info; v : 'a }

  let dummyinfo = UNKNOWN

  let createInfo f l c = FI (f, l, c)

  let errf f =
    print_flush ();
    open_vbox 0;
    open_hvbox 0;
    f ();
    print_cut ();
    close_box ();
    print_newline ();
    raise (Exit 1)

  let printInfo =
    (* In the text of the book, file positions in error messages are replaced
       with the string "Error:" *)
    function
    | FI (f, l, c) ->
        print_string f;
        print_string ":";
        print_int l;
        print_string ".";
        print_int c;
        print_string ":"
    | UNKNOWN -> print_string "<Unknown file and line>: "

  let errfAt fi f =
    errf (fun () ->
        printInfo fi;
        print_space ();
        f ())

  let err s =
    errf (fun () ->
        print_string "Error: ";
        print_string s;
        print_newline ())

  let error fi s =
    errfAt fi (fun () ->
        print_string s;
        print_newline ())

  let warning s =
    print_string "Warning: ";
    print_string s;
    print_newline ()

  let warningAt fi s =
    printInfo fi;
    print_string " Warning: ";
    print_string s;
    print_newline ()
end

(* ---------------------------------------------------------------------- *)

module Pervasive = struct
  type info = Error.info

  let pr = Format.print_string
end
(* module pervasive *)

support.ml lexer.mll

이것은 ocamllex와 ocamlyacc라는 능지와 포사 코드를 만드는 도구에 사용되는 코드입니다.parser.mly의 기본 문법은 다음과 같다.

{ header }
let ident = regexp …
[refill { refill-handler }]
rule entrypoint [arg1… argn] =
  parse regexp { action }
      | …
      | regexp { action }
and entrypoint [arg1… argn] =
  parse …
and …
{ trailer }

처음과 마지막ocamllex에 둘러싸인 헤더와trailer가 페이지 맨 끝에 예약어를 결정했다.{}와 header 사이에서 자주 사용하는 정규 표현식entrypoint을 정의할 수 있다.이걸 쓰면 노동력을 절약할 수 있다.Tapl의 샘플 코드에는 함수가 포함되어 있습니다.ident에서 실제 자구를 해석할 때의 문법을 정규 표현식으로 쓰고 entrypoint에서 이때 실행된 처리를 쓴다.정규 표현식이 최대 값에 일치했습니다.
상세한 문법은 참조HP.
lexerl.mll

(* 
   The lexical analyzer: lexer.ml is generated automatically
   from lexer.mll.
   
   The only modification commonly needed here is adding new keywords to the 
   list of reserved words at the top.  
*)

{
open Support.Error

(* 予約語 *)
let reservedWords = [
  (* Keywords *)
  (* 予約キーワード *)
  ("import", fun i -> Parser.IMPORT i);
  ("if", fun i -> Parser.IF i);
  ("then", fun i -> Parser.THEN i);
  ("else", fun i -> Parser.ELSE i);
  ("true", fun i -> Parser.TRUE i);
  ("false", fun i -> Parser.FALSE i);
  ("succ", fun i -> Parser.SUCC i);
  ("pred", fun i -> Parser.PRED i);
  ("iszero", fun i -> Parser.ISZERO i);
  
  (* 予約記号 *)
  (* Symbols *)
  ("_", fun i -> Parser.USCORE i);
  ("'", fun i -> Parser.APOSTROPHE i);
  ("\"", fun i -> Parser.DQUOTE i);
  ("!", fun i -> Parser.BANG i);
  ("#", fun i -> Parser.HASH i);
  ("$", fun i -> Parser.TRIANGLE i);
  ("*", fun i -> Parser.STAR i);
  ("|", fun i -> Parser.VBAR i);
  (".", fun i -> Parser.DOT i);
  (";", fun i -> Parser.SEMI i);
  (",", fun i -> Parser.COMMA i);
  ("/", fun i -> Parser.SLASH i);
  (":", fun i -> Parser.COLON i);
  ("::", fun i -> Parser.COLONCOLON i);
  ("=", fun i -> Parser.EQ i);
  ("==", fun i -> Parser.EQEQ i);
  ("[", fun i -> Parser.LSQUARE i); 
  ("<", fun i -> Parser.LT i);
  ("{", fun i -> Parser.LCURLY i); 
  ("(", fun i -> Parser.LPAREN i); 
  ("<-", fun i -> Parser.LEFTARROW i); 
  ("{|", fun i -> Parser.LCURLYBAR i); 
  ("[|", fun i -> Parser.LSQUAREBAR i); 
  ("}", fun i -> Parser.RCURLY i);
  (")", fun i -> Parser.RPAREN i);
  ("]", fun i -> Parser.RSQUARE i);
  (">", fun i -> Parser.GT i);
  ("|}", fun i -> Parser.BARRCURLY i);
  ("|>", fun i -> Parser.BARGT i);
  ("|]", fun i -> Parser.BARRSQUARE i);

  (* Special compound symbols: *)
  (* 上の記号が複数結合してできた記号 *)
  (":=", fun i -> Parser.COLONEQ i);
  ("->", fun i -> Parser.ARROW i);
  ("=>", fun i -> Parser.DARROW i);
  ("==>", fun i -> Parser.DDARROW i);
]

(* Support functions *)
(* 諸々の補助関数 *)
type buildfun = info -> Parser.token
let (symbolTable : (string,buildfun) Hashtbl.t) = Hashtbl.create 1024
let _ =
  List.iter (fun (str,f) -> Hashtbl.add symbolTable str f) reservedWords

let createID i str =
  try (Hashtbl.find symbolTable str) i
  with _ ->
    if (String.get str 0) >= 'A' && (String.get str 0) <= 'Z' then
       Parser.UCID {i=i;v=str}
    else 
       Parser.LCID {i=i;v=str}

let lineno   = ref 1
and depth    = ref 0
and start    = ref 0

and filename = ref ""
and startLex = ref dummyinfo

let create inFile stream =
  if not (Filename.is_implicit inFile) then filename := inFile
  else filename := Filename.concat (Sys.getcwd()) inFile;
  lineno := 1; start := 0; Lexing.from_channel stream

let newline lexbuf = incr lineno; start := (Lexing.lexeme_start lexbuf)

let info lexbuf =
  createInfo (!filename) (!lineno) (Lexing.lexeme_start lexbuf - !start)

let text = Lexing.lexeme

let stringBuffer = ref (Bytes.create 2048)
let stringEnd = ref 0

let resetStr () = stringEnd := 0

let addStr ch =
  let x = !stringEnd in
  let buffer = !stringBuffer
in
  if x = Bytes.length buffer then
    begin
      let newBuffer = Bytes.create (x*2) in
      Bytes.blit buffer 0 newBuffer 0 x;
      Bytes.set newBuffer x ch;
      stringBuffer := newBuffer;
      stringEnd := x+1
    end
  else
    begin
      Bytes.set buffer x ch;
      stringEnd := x+1
    end

let getStr () = Bytes.sub_string (!stringBuffer) 0 (!stringEnd)

let extractLineno yytext offset =
  int_of_string (String.sub yytext offset (String.length yytext - offset))
}


(* The main body of the lexical analyzer *)
(* 字句解析 *)
rule main = parse
  [' ' '\009' '\012']+     { main lexbuf }

| [' ' '\009' '\012']*"\n" { newline lexbuf; main lexbuf }

| "*/" { error (info lexbuf) "Unmatched end of comment" }

| "/*" { depth := 1; startLex := info lexbuf; comment lexbuf; main lexbuf }

| "# " ['0'-'9']+
    { lineno := extractLineno (text lexbuf) 2 - 1; getFile lexbuf }

| "# line " ['0'-'9']+
    { lineno := extractLineno (text lexbuf) 7 - 1; getFile lexbuf }

| ['0'-'9']+
    { Parser.INTV{i=info lexbuf; v=int_of_string (text lexbuf)} }

| ['0'-'9']+ '.' ['0'-'9']+
    { Parser.FLOATV{i=info lexbuf; v=float_of_string (text lexbuf)} }

| ['A'-'Z' 'a'-'z' '_']
  ['A'-'Z' 'a'-'z' '_' '0'-'9' '\'']*
    { createID (info lexbuf) (text lexbuf) }

| ":=" | "<:" | "<-" | "->" | "=>" | "==>"
| "{|" | "|}" | "<|" | "|>" | "[|" | "|]" | "=="
    { createID (info lexbuf) (text lexbuf) }

| ['~' '%' '\\' '+' '-' '&' '|' ':' '@' '`' '$']+
    { createID (info lexbuf) (text lexbuf) }

| ['*' '#' '/' '!' '?' '^' '(' ')' '{' '}' '[' ']' '<' '>' '.' ';' '_' ','
   '=' '\'']
    { createID (info lexbuf) (text lexbuf) }

| "\"" { resetStr(); startLex := info lexbuf; string lexbuf }

| eof { Parser.EOF(info lexbuf) }

| _  { error (info lexbuf) "Illegal character" }

(* コメント *)
and comment = parse
  "/*"
    { depth := succ !depth; comment lexbuf }
| "*/"
    { depth := pred !depth; if !depth > 0 then comment lexbuf }
| eof
    { error (!startLex) "Comment not terminated" }
| [^ '\n']
    { comment lexbuf }
| "\n"
    { newline lexbuf; comment lexbuf }

(* getFile, getName, finishNameでファイル名を取得している *)
and getFile = parse
  " "* "\"" { getName lexbuf }

and getName = parse
  [^ '"' '\n']+ { filename := (text lexbuf); finishName lexbuf }

and finishName = parse
  '"' [^ '\n']* { main lexbuf }

(* 文字列 *)
and string = parse
  '"'  { Parser.STRINGV {i = !startLex; v=getStr()} }
| '\\' { addStr(escaped lexbuf); string lexbuf }
| '\n' { addStr '\n'; newline lexbuf; string lexbuf }
| eof  { error (!startLex) "String not terminated" }
| _    { addStr (Lexing.lexeme_char lexbuf 0); string lexbuf }

(* 文字列のエスケープ文字 *)
and escaped = parse
  'n'	 { '\n' }
| 't'	 { '\t' }
| '\\'	 { '\\' }
| '"'    { '\034'  }
| '\''	 { '\'' }
| ['0'-'9']['0'-'9']['0'-'9']
    {
      let x = int_of_string(text lexbuf) in
      if x > 255 then
	error (info lexbuf) "Illegal character constant"
      else
	Char.chr x
    }
| [^ '"' '\\' 't' 'n' '\'']
    { error (info lexbuf) "Illegal character constant" }

(*  *)

ocamlyacc의 문서는 다음과 같다.

%{
  header
%}
  declarations
%%
  rules
%%
  trailer

{ action }에는 parser.mly와header가 없고, trailer에 해당하는 부분에서 영패를 선언하고, declarations에 해당하는 부분에 퍼스의 규칙을 쓴다.자구 해석과 마찬가지로 rules에서 패턴을 일치시키고 rules에서 처리한다.
parser.mly

/*  
 *  Yacc grammar for the parser.  The files parser.mli and parser.ml
 *  are generated automatically from parser.mly.
 */

%{
open Support.Error
open Support.Pervasive
open Syntax
%}

/* ---------------------------------------------------------------------- */
/* Preliminaries */

/* We first list all the tokens mentioned in the parsing rules
   below.  The names of the tokens are common to the parser and the
   generated lexical analyzer.  Each token is annotated with the type
   of data that it carries; normally, this is just file information
   (which is used by the parser to annotate the abstract syntax trees
   that it constructs), but sometimes -- in the case of identifiers and
   constant values -- more information is provided.
 */

/* Keyword tokens */
(* トークン *)
%token <Support.Error.info> IMPORT
%token <Support.Error.info> IF
%token <Support.Error.info> THEN
%token <Support.Error.info> ELSE
%token <Support.Error.info> TRUE
%token <Support.Error.info> FALSE
%token <Support.Error.info> SUCC
%token <Support.Error.info> PRED
%token <Support.Error.info> ISZERO

/* Identifier and constant value tokens */
(* 識別子、定数に使う *)
%token <string Support.Error.withinfo> UCID  /* uppercase-initial */
%token <string Support.Error.withinfo> LCID  /* lowercase/symbolic-initial */
%token <int Support.Error.withinfo> INTV
%token <float Support.Error.withinfo> FLOATV
%token <string Support.Error.withinfo> STRINGV

/* Symbolic tokens */
(* 記号に使う *)
%token <Support.Error.info> APOSTROPHE
%token <Support.Error.info> DQUOTE
%token <Support.Error.info> ARROW
%token <Support.Error.info> BANG
%token <Support.Error.info> BARGT
%token <Support.Error.info> BARRCURLY
%token <Support.Error.info> BARRSQUARE
%token <Support.Error.info> COLON
%token <Support.Error.info> COLONCOLON
%token <Support.Error.info> COLONEQ
%token <Support.Error.info> COLONHASH
%token <Support.Error.info> COMMA
%token <Support.Error.info> DARROW
%token <Support.Error.info> DDARROW
%token <Support.Error.info> DOT
%token <Support.Error.info> EOF
%token <Support.Error.info> EQ
%token <Support.Error.info> EQEQ
%token <Support.Error.info> EXISTS
%token <Support.Error.info> GT
%token <Support.Error.info> HASH
%token <Support.Error.info> LCURLY
%token <Support.Error.info> LCURLYBAR
%token <Support.Error.info> LEFTARROW
%token <Support.Error.info> LPAREN
%token <Support.Error.info> LSQUARE
%token <Support.Error.info> LSQUAREBAR
%token <Support.Error.info> LT
%token <Support.Error.info> RCURLY
%token <Support.Error.info> RPAREN
%token <Support.Error.info> RSQUARE
%token <Support.Error.info> SEMI
%token <Support.Error.info> SLASH
%token <Support.Error.info> STAR
%token <Support.Error.info> TRIANGLE
%token <Support.Error.info> USCORE
%token <Support.Error.info> VBAR

/* ---------------------------------------------------------------------- */
/* The starting production of the generated parser is the syntactic class
   toplevel.  The type that is returned when a toplevel is recognized is
   Syntax.command list.
*/

%start toplevel
%type < Syntax.command list > toplevel
%%

/* ---------------------------------------------------------------------- */
/* Main body of the parser definition */

/* The top level of a file is a sequence of commands, each terminated
   by a semicolon. */
(* ここから開始 *)
toplevel :
    EOF
      { [] }
  | Command SEMI toplevel
      { let cmd = $1 in
          let cmds = $3 in
          cmd::cmds }

/* A top-level command */
(* インポートもしくは Term を生成 *)
Command :
    IMPORT STRINGV { (Import($2.v)) }
  | Term 
      { (let t = $1 in Eval(tmInfo t,t)) }

(* ATerm もしくは AppTerm を生成 *)
Term :
    AppTerm
      { $1 }
  | IF Term THEN Term ELSE Term
      { TmIf($1, $2, $4, $6) }

(* ATerm を生成 *)
AppTerm :
    ATerm
      { $1 }
  | SUCC ATerm
      { TmSucc($1, $2) }
  | PRED ATerm
      { TmPred($1, $2) }
  | ISZERO ATerm
      { TmIsZero($1, $2) }

/* Atomic terms are ones that never require extra parentheses */
(* 終端 *)
ATerm :
    LPAREN Term RPAREN  
      { $2 } 
  | TRUE
      { TmTrue($1) }
  | FALSE
      { TmFalse($1) }
  | INTV
      { let rec f n = match n with
              0 -> TmZero($1.i)
            | n -> TmSucc($1.i, f (n-1))
          in f $1.v }


/*   */

{ } syntax.ml

이거 그냥.
syntax.mli

(* module Syntax: syntax trees and associated support functions *)

open Support.Pervasive
open Support.Error

(* Data type definitions *)
type term =
  | TmTrue of info
  | TmFalse of info
  | TmIf of info * term * term * term
  | TmZero of info
  | TmSucc of info * term
  | TmPred of info * term
  | TmIsZero of info * term

type command = Import of string | Eval of info * term

(* Printing *)
val printtm : term -> unit

val printtm_ATerm : bool -> term -> unit

(* Misc *)
val tmInfo : term -> info

syntax.mli core.ml

모든core.mli의 평가는term였다.
core.mli

(* module Core

   Core typechecking and evaluation functions
*)

open Syntax
open Support.Error

val eval : term -> term

term

이것들을 조합해서 이 처리를 집행해라.그래서 이 책에 언급된 언어가 움직인다.
main.ml

(* Module Main: The main program.  Deals with processing the command
   line, reading files, building and connecting lexers and parsers, etc.

   For most experiments with the implementation, it should not be
   necessary to change this file.
*)

open Format
open Support.Pervasive
open Support.Error
open Syntax
open Core

let searchpath = ref [ "" ]

let argDefs =
  [
    ( "-I",
      Arg.String (fun f -> searchpath := f :: !searchpath),
      "Append a directory to the search path" );
  ]

let parseArgs () =
  let inFile = ref (None : string option) in
  Arg.parse argDefs
    (fun s ->
      match !inFile with
      | Some _ -> err "You must specify exactly one input file"
      | None -> inFile := Some s)
    "";
  match !inFile with
  | None -> err "You must specify an input file"
  | Some s -> s

let openfile infile =
  let rec trynext l =
    match l with
    | [] -> err ("Could not find " ^ infile)
    | d :: rest -> (
        let name = if d = "" then infile else d ^ "/" ^ infile in
        try open_in name with Sys_error m -> trynext rest)
  in
  trynext !searchpath

let parseFile inFile =
  let pi = openfile inFile in
  let lexbuf = Lexer.create inFile pi in
  let result =
    try Parser.toplevel Lexer.main lexbuf
    with Parsing.Parse_error -> error (Lexer.info lexbuf) "Parse error"
  in
  Parsing.clear_parser ();
  close_in pi;
  result

let alreadyImported = ref ([] : string list)

let rec process_file f =
  if List.mem f !alreadyImported then ()
  else (
    alreadyImported := f :: !alreadyImported;
    let cmds = parseFile f in
    let g c =
      open_hvbox 0;
      let results = process_command c in
      print_flush ();
      results
    in
    List.iter g cmds)

and process_command cmd =
  match cmd with
  | Import f -> process_file f
  | Eval (fi, t) ->
      let t' = eval t in
      printtm_ATerm true t';
      force_newline ();
      ()

let main () =
  let inFile = parseArgs () in
  let _ = process_file inFile in
  ()

let () = set_max_boxes 1000

let () = set_margin 67

let res =
  Printexc.catch
    (fun () ->
      try
        main ();
        0
      with Exit x -> x)
    ()

let () = print_flush ()

let () = exit res