diff --git a/html2text.go b/html2text.go index 8fe9000..05aeeb3 100644 --- a/html2text.go +++ b/html2text.go @@ -21,48 +21,6 @@ type Options struct { TextOnly bool // Returns only plain text } -// PrettyTablesOptions overrides tablewriter behaviors -type PrettyTablesOptions struct { - AutoFormatHeader bool - AutoWrapText bool - ReflowDuringAutoWrap bool - ColWidth int - ColumnSeparator string - RowSeparator string - CenterSeparator string - HeaderAlignment int - FooterAlignment int - Alignment int - ColumnAlignment []int - NewLine string - HeaderLine bool - RowLine bool - AutoMergeCells bool - Borders tablewriter.Border -} - -// NewPrettyTablesOptions creates PrettyTablesOptions with default settings -func NewPrettyTablesOptions() *PrettyTablesOptions { - return &PrettyTablesOptions{ - AutoFormatHeader: true, - AutoWrapText: true, - ReflowDuringAutoWrap: true, - ColWidth: tablewriter.MAX_ROW_WIDTH, - ColumnSeparator: tablewriter.COLUMN, - RowSeparator: tablewriter.ROW, - CenterSeparator: tablewriter.CENTER, - HeaderAlignment: tablewriter.ALIGN_DEFAULT, - FooterAlignment: tablewriter.ALIGN_DEFAULT, - Alignment: tablewriter.ALIGN_DEFAULT, - ColumnAlignment: []int{}, - NewLine: tablewriter.NEWLINE, - HeaderLine: true, - RowLine: false, - AutoMergeCells: false, - Borders: tablewriter.Border{Left: true, Right: true, Bottom: true, Top: true}, - } -} - // FromHTMLNode renders text output from a pre-parsed HTML document. func FromHTMLNode(doc *html.Node, o ...Options) (string, error) { var options Options @@ -70,6 +28,11 @@ func FromHTMLNode(doc *html.Node, o ...Options) (string, error) { options = o[0] } + if options.PrettyTables && options.PrettyTablesOptions == nil { + // defaults need to make explicit as they are no longer identical with tablewriter + options.PrettyTablesOptions = NewPrettyTablesOptions() + } + ctx := textifyTraverseContext{ buf: bytes.Buffer{}, options: options, @@ -333,31 +296,18 @@ func (ctx *textifyTraverseContext) handleTableElement(node *html.Node) error { buf := &bytes.Buffer{} table := tablewriter.NewWriter(buf) - if ctx.options.PrettyTablesOptions != nil { - options := ctx.options.PrettyTablesOptions - table.SetAutoFormatHeaders(options.AutoFormatHeader) - table.SetAutoWrapText(options.AutoWrapText) - table.SetReflowDuringAutoWrap(options.ReflowDuringAutoWrap) - table.SetColWidth(options.ColWidth) - table.SetColumnSeparator(options.ColumnSeparator) - table.SetRowSeparator(options.RowSeparator) - table.SetCenterSeparator(options.CenterSeparator) - table.SetHeaderAlignment(options.HeaderAlignment) - table.SetFooterAlignment(options.FooterAlignment) - table.SetAlignment(options.Alignment) - table.SetColumnAlignment(options.ColumnAlignment) - table.SetNewLine(options.NewLine) - table.SetHeaderLine(options.HeaderLine) - table.SetRowLine(options.RowLine) - table.SetAutoMergeCells(options.AutoMergeCells) - table.SetBorders(options.Borders) - } - table.SetHeader(ctx.tableCtx.header) - table.SetFooter(ctx.tableCtx.footer) - table.AppendBulk(ctx.tableCtx.body) + ctx.options.PrettyTablesOptions.configureTable(table) + + table.Header(ctx.tableCtx.header) + table.Footer(ctx.tableCtx.footer) + if err := table.Bulk(ctx.tableCtx.body); err != nil { + return err + } // Render the table using ASCII. - table.Render() + if err := table.Render(); err != nil { + return err + } if err := ctx.emit(buf.String()); err != nil { return err } diff --git a/html2text_test.go b/html2text_test.go index 452b45e..cb418a2 100644 --- a/html2text_test.go +++ b/html2text_test.go @@ -9,6 +9,9 @@ import ( "regexp" "strings" "testing" + + "github.com/olekukonko/tablewriter" + "github.com/olekukonko/tablewriter/tw" ) const destPath = "testdata" @@ -353,6 +356,54 @@ Table 2 Header 1 Table 2 Header 2 Table 2 Footer 1 Table 2 Footer 2 Table 2 Row } } +func TestTablesWithDirectConfiguration(t *testing.T) { + testCases := []struct { + input string + tabularOutput string + }{ + { + ` + + + + + + + + + + +
Header 1Header 2
Footer 1Footer 2
Row 1 Col 1Row 1 Col 2
Row 2 Col 1Row 2 Col 2
`, + `┌─────────────┬─────────────┐ +│ HEADER 1 │ HEADER 2 │ +├─────────────┼─────────────┤ +│ Row 1 Col 1 │ Row 1 Col 2 │ +│ Row 2 Col 1 │ Row 2 Col 2 │ +└─────────────┴─────────────┘`, + }, + } + + for _, testCase := range testCases { + options := Options{ + PrettyTables: true, + PrettyTablesOptions: &PrettyTablesOptions{ + Configuration: func(table *tablewriter.Table) { + table.Options( + tablewriter.WithHeaderAlignment(tw.AlignRight), + tablewriter.WithFooterControl(tw.Control{Hide: tw.On}), + ) + }, + }, + } + // Check pretty tabular ASCII version. + if msg, err := wantString(testCase.input, testCase.tabularOutput, options); err != nil { + t.Error(err) + } else if len(msg) > 0 { + t.Log(msg) + } + } +} + func TestStrippingLists(t *testing.T) { testCases := []struct { input string diff --git a/prettytables.go b/prettytables.go new file mode 100644 index 0000000..273483d --- /dev/null +++ b/prettytables.go @@ -0,0 +1,212 @@ +package html2text + +import ( + "github.com/olekukonko/tablewriter" + "github.com/olekukonko/tablewriter/tw" +) + +// PrettyTablesOptions overrides tablewriter behaviors +type PrettyTablesOptions struct { + AutoFormatHeader bool + AutoWrapText bool + // Deprecated. Tablewriter always assumes this to be `true` + ReflowDuringAutoWrap bool + ColWidth int + ColumnSeparator string + RowSeparator string + CenterSeparator string + HeaderAlignment tw.Align + FooterAlignment tw.Align + Alignment tw.Align + ColumnAlignment tw.Alignment + // Deprecated. Tablewriter always assumes this to be `\n` + NewLine string + HeaderLine bool + RowLine bool + AutoMergeCells bool + Borders Border + // Configuration allows to directly manipulate the `Table` with all what [tablewriter] offers. + // Setting this ignores all the rest of the settings of this struct. + Configuration func(table *tablewriter.Table) +} + +// NewPrettyTablesOptions creates PrettyTablesOptions with default settings +func NewPrettyTablesOptions() *PrettyTablesOptions { + return &PrettyTablesOptions{ + AutoFormatHeader: true, + AutoWrapText: true, + ColWidth: 32, // old tablewriter.MAX_ROW_WIDTH + borders now count into width + ColumnSeparator: defaultBorderStyle.ColumnSeparator, + RowSeparator: defaultBorderStyle.RowSeparator, + CenterSeparator: defaultBorderStyle.CenterSeparator, + HeaderAlignment: tw.AlignCenter, + FooterAlignment: tw.AlignCenter, + Alignment: tw.AlignDefault, + ColumnAlignment: make(tw.Alignment, 0), + HeaderLine: true, + RowLine: false, + AutoMergeCells: false, + Borders: Border{Left: true, Right: true, Bottom: true, Top: true}, + } +} + +func (p *PrettyTablesOptions) configureTable(table *tablewriter.Table) { + if p.Configuration != nil { + p.Configuration(table) + return + } + + cfg := tablewriter.NewConfigBuilder() + + cfg.WithHeaderAutoFormat(asState(p.AutoFormatHeader)).WithFooterAutoFormat(asState(p.AutoFormatHeader)). + WithRowAutoWrap(p.wrapMode()).WithHeaderAutoWrap(p.wrapMode()).WithFooterAutoWrap(p.wrapMode()). + WithRowMaxWidth(p.ColWidth). + WithHeaderAlignment(p.HeaderAlignment). + WithFooterAlignment(p.FooterAlignment). + WithRowAlignment(p.Alignment). + WithRowMergeMode(p.mergeMode()) + + if len(p.ColumnAlignment) > 0 { + cfg.Row().Alignment().WithPerColumn(p.ColumnAlignment) + } + + rendition := tw.Rendition{ + Borders: p.Borders.withStates(), + Symbols: p.borderStyle(), + Settings: p.renderSettings(), + } + + table.Options( + tablewriter.WithConfig(cfg.Build()), + tablewriter.WithRendition(rendition)) +} + +func (p *PrettyTablesOptions) wrapMode() int { + if p.AutoWrapText { + return tw.WrapNormal + } else { + return tw.WrapNone + } +} + +func (p *PrettyTablesOptions) mergeMode() int { + if p.AutoMergeCells { + return tw.MergeVertical + } else { + return tw.MergeNone + } +} + +func (p *PrettyTablesOptions) renderSettings() tw.Settings { + return tw.Settings{ + Lines: tw.Lines{ + ShowHeaderLine: asState(p.HeaderLine), + }, + Separators: tw.Separators{ + BetweenRows: asState(p.RowLine), + }, + } +} + +// Border controls tablewriter borders. It uses simple bools instead of tablewriters `State` +type Border struct { + Left, Right, Bottom, Top bool +} + +func (b Border) withStates() tw.Border { + return tw.Border{ + Left: asState(b.Left), + Right: asState(b.Right), + Bottom: asState(b.Bottom), + Top: asState(b.Top), + } +} + +type BorderStyle struct { + ColumnSeparator string + RowSeparator string + CenterSeparator string +} + +func (b BorderStyle) Name() string { + return "html2text" +} + +func (b BorderStyle) Center() string { + return b.CenterSeparator +} + +func (b BorderStyle) Row() string { + return b.RowSeparator +} + +func (b BorderStyle) Column() string { + return b.ColumnSeparator +} + +func (b BorderStyle) TopLeft() string { + return b.CenterSeparator +} + +func (b BorderStyle) TopMid() string { + return b.CenterSeparator +} + +func (b BorderStyle) TopRight() string { + return b.CenterSeparator +} + +func (b BorderStyle) MidLeft() string { + return b.CenterSeparator +} + +func (b BorderStyle) MidRight() string { + return b.CenterSeparator +} + +func (b BorderStyle) BottomLeft() string { + return b.CenterSeparator +} + +func (b BorderStyle) BottomMid() string { + return b.CenterSeparator +} + +func (b BorderStyle) BottomRight() string { + return b.CenterSeparator +} + +func (b BorderStyle) HeaderLeft() string { + return b.CenterSeparator +} + +func (b BorderStyle) HeaderMid() string { + return b.CenterSeparator +} + +func (b BorderStyle) HeaderRight() string { + return b.CenterSeparator +} + +var defaultBorderStyle = BorderStyle{ + ColumnSeparator: "|", + RowSeparator: "-", + CenterSeparator: "+", +} + +func (p *PrettyTablesOptions) borderStyle() BorderStyle { + return BorderStyle{ + ColumnSeparator: p.ColumnSeparator, + RowSeparator: p.RowSeparator, + CenterSeparator: p.CenterSeparator, + } +} + +func asState(b bool) tw.State { + // TableWriter does not provide this by default :( + if b { + return tw.On + } else { + return tw.Off + } +}