Skip to content

Potential Precedence Issue #16

@billdenney

Description

@billdenney

I'm still working on the ISO 8601 parser mentioned in #15.

I've made some good progress, but there is an issue with an unambiguous, but initially multiply-matching rule. In the example below, I expected the parser to do the following:

  1. Find digit4 (it does that correctly)
  2. Assign the digit4 to yearnum (it does that correctly)
  3. Find digit2 (it doesn't do that)
  4. Assign the digit2 to monthnum
  5. Find digit2
  6. Assign the digit2 to mdaynum

I'm not sure why it's not finding digit2 and it is finding digit3. It should only find digit3 if that is the end of the string. Below is the code, and the issue is with the first call to parser::parse().

library(rly)

TOKENS <- c("DIGIT", "DECIMALPOINT")
LITERALS <- c("W", "Z", "Q", "W", "T", ":", "-")

p_collapse <- function(x, p) {
  paste0(sapply(X=x, FUN=p$get), collapse="")
}

set_value <- function(p) {
  ret <- list()
  for (idx in (1 + seq_len(p$length() - 1))) {
    current <- p$get(idx)
    for (nm in names(current)) {
      if (nm %in% names(ret)) {
        if (ret[[nm]] != current[[nm]]) {
          print(ret)
          print(current)
          stop(sprintf("mismatch with %s: %s vs %s", nm, ret[[nm]], current[[nm]]))
        }
      } else {
        ret[[nm]] <- current[[nm]]
      }
    }
  }
  ret
}

# Lexer ####

Lexer <-
  R6::R6Class(
    "Lexer",
    public=list(
      tokens=TOKENS,
      literals=LITERALS,
      t_DIGIT="[0-9]",
      t_DECIMALPOINT="[\\.,]",
      #t_ignore = " \t",
      t_newline = function(re='\\n+', t) {
        t$lexer$lineno <- t$lexer$lineno + nchar(t$value)
        return(NULL)
      },
      t_error = function(t) {
        cat(sprintf("Illegal character '%s'", t$value[1]))
        t$lexer$skip(1)
        return(t)
      }
    )
  )

# General parser support functions ####

l_parser_general <-
  list(
    tokens=TOKENS,
    literals=LITERALS,

    ## Helpers ####
    p_fraction=function(doc="fraction : DECIMALPOINT multi_digit", p) {
      part <- "fraction"
      message(part)
      p$set(1, list(fraction=p$get(3)))
    },
    p_multi_digit=function(doc="multi_digit : DIGIT
                                            | digit2
                                            | digit3
                                            | digit4", p) {
      part <- "multi_digit"
      message(part)
      p$set(1, p$get(2))
    },
    p_digit4=function(doc="digit4 : digit3 DIGIT", p) {
      part <- "digit4"
      message(part)
      p$set(1, p_collapse(2:3, p))
    },
    p_digit3=function(doc="digit3 : digit2 DIGIT", p) {
      part <- "digit3"
      message(part)
      p$set(1, p_collapse(2:3, p))
    },
    p_digit2=function(doc="digit2 : DIGIT DIGIT", p) {
      part <- "digit2"
      message(part)
      p$set(1, p_collapse(2:3, p))
    },
    p_basic=function(doc="basic : ", p) {
      p$set(1, list(iso_8601_format="basic"))
    },
    p_error = function(p) {
      if(is.null(p)) {
        cat("Syntax error at EOF")
      } else {
        cat(sprintf(
          "Syntax error at '%s'\n%s\n%s^",
          p$value, p$lexer$lexdata, strrep(' ', p$lexpos - 1)
        ))
      }
    }
  )

# Specific numbers ####

l_specific_numbers <-
  list(
    p_yearnum=function(doc="yearnum : digit4", p) {
      part <- "yearnum"
      message(part)
      p$set(1, list(year=p$get(2)))
    },
    p_monthnum=function(doc="monthnum : digit2", p) {
      part <- "monthnum"
      message(part)
      p$set(1, list(month=p$get(2)))
    },
    p_mdaynum=function(doc="mdaynum : digit2", p) {
      part <- "mdaynum"
      message(part)
      p$set(1, list(mday=p$get(2)))
    },
    p_weeknum=function(doc="weeknum : digit2", p) {
      part <- "weeknum"
      message(part)
      p$set(1, list(week=p$get(2)))
    },
    p_weekdaynum=function(doc="weekdaynum : DIGIT", p) {
      part <- "weekdaynum"
      message(part)
      p$set(1, list(weekday=p$get(2)))
    },
    p_odaynum=function(doc="odaynum : digit3", p) {
      part <- "odaynum"
      message(part)
      p$set(1, list(oday=p$get(2)))
    },
    p_hournum=function(doc="hournum : digit2", p) {
      part <- "hournum"
      message(part)
      p$set(1, list(hour=p$get(2)))
    },
    p_minutenum=function(doc="minutenum : digit2", p) {
      part <- "minutenum"
      message(part)
      p$set(1, list(minute=p$get(2)))
    },
    p_secondnum=function(doc="secondnum : digit2", p) {
      part <- "secondnum"
      message(part)
      p$set(1, list(second=p$get(2)))
    }
  )

# Extended Parser ####

l_extended_iso8601 <-
  list(
    p_date=function(doc="date : year", p) {
      part <- "date"
      message(part)
      p$set(1, set_value(p))
    },
    p_year=function(doc="year : yearnum
                              | yearnum fraction
                              | yearnum basic subyear
                              | yearnum dash subyear", p) {
      part <- "year"
      message(part)
      p$set(1, set_value(p))
    },
    p_subyear=function(doc="subyear : month
                                    | week
                                    | oday", p) {
      part <- "subyear"
      message(part)
      p$set(1, set_value(p))
    },
    p_month=function(doc="month : monthnum
                                | monthnum fraction
                                | monthnum basic mday
                                | monthnum dash mday", p) {
      part <- "month"
      message(part)
      p$set(1, set_value(p))
    },
    p_mday=function(doc="mday : mdaynum
                              | mdaynum fraction
                              | mdaynum subday", p) {
      part <- "mday"
      message(part)
      p$set(1, set_value(p))
    },
    p_week=function(doc="week : week_w weeknum
                              | week_w weeknum fraction
                              | week_w weeknum basic weekday
                              | week_w weeknum dash weekday", p) {
      part <- "week"
      message(part)
      p$set(1, set_value(p))
    },
    p_week_w=function(doc="week_w : 'W'", p) {
      part <- "week_w"
      message(part)
      p$set(1, list())
    },
    p_weekday=function(doc="weekday : weekdaynum
                                    | weekdaynum fraction
                                    | weekdaynum subday", p) {
      part <- "weekday"
      message(part)
      p$set(1, set_value(p))
    },
    p_oday=function(doc="oday : odaynum
                              | odaynum fraction
                              | odaynum subday", p) {
      part <- "oday"
      message(part)
      p$set(1, set_value(p))
    },
    p_subday=function(doc="subday : time_with_t", p) {
      part <- "subday"
      message(part)
      p$set(1, set_value(p))
    },
    p_time=function(doc="time : time_with_t
                              | time_without_t", p) {
      # if just hour is given, it must be preceded by 'T'
      part <- "time"
      message(part)
      p$set(1, set_value(p))
    },
    p_time_with_t=function(doc="time_with_t : time_t hournum
                                            | time_t hournum fraction
                                            | time_t time_without_t", p) {
      part <- "time_with_t"
      message(part)
      p$set(1, set_value(p))
    },
    p_time_t=function(doc="time_t : 'T'", p) {
      part <- "time_t"
      message(part)
      p$set(1, list())
    },
    p_time_without_t=function(doc="time_without_t : hournum basic minute
                                                  | hournum colon minute", p) {
      part <- "time_without_t"
      message(part)
      p$set(1, set_value(p))
    },
    p_minute=function(doc="minute : minutenum
                                  | minutenum fraction
                                  | minutenum basic second
                                  | minutenum colon second", p) {
      part <- "minute"
      message(part)
      p$set(1, set_value(p))
    },
    p_second=function(doc="second : secondnum
                                  | secondnum fraction", p) {
      part <- "second"
      message(part)
      p$set(1, set_value(p))
    },
    p_dash=function(doc="dash : '-'", p) {
      part <- "dash"
      message(part)
      p$set(1, list(iso_8601_format="extended"))
    },
    p_colon=function(doc="colon : ':'", p) {
      part <- "colon"
      message(part)
      p$set(1, list(iso_8601_format="extended"))
    }
  )

Parser <-
  R6::R6Class(
    "Basic Parser",
    public=append(append(l_extended_iso8601, l_specific_numbers), l_parser_general)
  )

lexer  <- rly::lex(Lexer)
parser <- rly::yacc(Parser)
#> WARN [2021-11-12 13:45:33] Rule time defined, but not used
#> WARN [2021-11-12 13:45:33] There is 1 unused rule
#> WARN [2021-11-12 13:45:33] Symbol time is unreachable

parser$parse("20201101", lexer)
#> digit2
#> digit3
#> digit4
#> yearnum
#> digit2
#> digit3
#> Syntax error at '1'
#> 20201101
#>        ^
#> NULL
parser$parse("2020110", lexer)
#> digit2
#> digit3
#> digit4
#> yearnum
#> digit2
#> digit3
#> odaynum
#> oday
#> subyear
#> year
#> date
#> $year
#> [1] "2020"
#> 
#> $iso_8601_format
#> [1] "basic"
#> 
#> $oday
#> [1] "110"
parser$parse("2020-11-01", lexer)
#> digit2
#> digit3
#> digit4
#> yearnum
#> dash
#> digit2
#> monthnum
#> dash
#> digit2
#> mdaynum
#> mday
#> month
#> subyear
#> year
#> date
#> $year
#> [1] "2020"
#> 
#> $iso_8601_format
#> [1] "extended"
#> 
#> $month
#> [1] "11"
#> 
#> $mday
#> [1] "01"
parser$parse("2020-110", lexer)
#> digit2
#> digit3
#> digit4
#> yearnum
#> dash
#> digit2
#> digit3
#> odaynum
#> oday
#> subyear
#> year
#> date
#> $year
#> [1] "2020"
#> 
#> $iso_8601_format
#> [1] "extended"
#> 
#> $oday
#> [1] "110"

Created on 2021-11-12 by the reprex package (v2.0.1)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions