diff --git a/README.md b/README.md index 4115c0f..2adf4c7 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,23 @@ -# jira2markdown +# Overview `jira2markdown` is a text converter from [JIRA markup](https://jira.atlassian.com/secure/WikiRendererHelpAction.jspa?section=all) to [YouTrack Markdown](https://www.jetbrains.com/help/youtrack/standalone/youtrack-markdown-syntax-issues.html) using parsing expression grammars. The Markdown implementation in YouTrack follows the [CommonMark specification](https://spec.commonmark.org/0.29/) with extensions. Thus, `jira2markdown` can be used to convert text to any Markdown syntax with minimal modifications. +# Table of Contents + + * [Prerequisites](#prerequisites) + * [Installation](#installation) + * [Usage](#usage) + * [Conversion tables](#conversion-tables) + * [Headings](#headings) + * [Text Effects](#text-effects) + * [Text Breaks](#text-breaks) + * [Links](#links) + * [Lists](#lists) + * [Images](#images) + * [Tables](#tables) + * [Advanced Formatting](#advanced-formatting) + * [Customization](#customization) + # Prerequisites - Python 3.6+ @@ -296,3 +312,39 @@ Some text with a title + +# Customization + +To customize the list of markup elements send it as an optional argument to `convert`: +```python +from jira2markdown import convert +from jira2markdown.elements import MarkupElements +from jira2markdown.markup.links import Link +from jira2markdown.markup.text_effects import Bold + +# Only bold and link tokens will be converted here +elements = MarkupElements([Link, Bold]) +convert("Some Jira text here", elements=elements) +``` + +Keep in mind that the order of markup elements is important! Elements are matching first from top to bottom in the list. + +To override some elements in the default element list use `insert_after`/`replace` methods: +```python +from jira2markdown import convert +from jira2markdown.elements import MarkupElements +from jira2markdown.markup.base import AbstractMarkup +from jira2markdown.markup.links import Link +from jira2markdown.markup.text_effects import Color + +class CustomColor(Color): + ... + +class MyElement(AbstractMarkup): + ... + +elements = MarkupElements() +elements.replace(Color, CustomColor) +elements.insert_after(Link, MyElement) +convert("Some Jira text here", elements=elements) +``` diff --git a/jira2markdown/elements.py b/jira2markdown/elements.py new file mode 100644 index 0000000..b67aeab --- /dev/null +++ b/jira2markdown/elements.py @@ -0,0 +1,67 @@ +from typing import Iterable, Type + +from pyparsing import Forward, MatchFirst, ParseExpression + +from jira2markdown.markup.advanced import Code, Noformat, Panel +from jira2markdown.markup.base import AbstractMarkup +from jira2markdown.markup.headings import Headings +from jira2markdown.markup.images import Image +from jira2markdown.markup.links import Attachment, Link, MailTo, Mention +from jira2markdown.markup.lists import OrderedList, UnorderedList +from jira2markdown.markup.tables import Table +from jira2markdown.markup.text_breaks import LineBreak, Mdash, Ndash, Ruler +from jira2markdown.markup.text_effects import BlockQuote, Bold, Color, EscSpecialChars, InlineQuote, Monospaced, \ + Quote, Strikethrough, Subscript, Superscript, Underline + + +class MarkupElements(list): + def __init__(self, seq: Iterable = ()): + super().__init__(seq or [ + UnorderedList, + OrderedList, + Code, + Noformat, + Monospaced, + Mention, + MailTo, + Attachment, + Link, + Image, + Table, + Headings, + Quote, + BlockQuote, + Panel, + Bold, + Ndash, + Mdash, + Ruler, + Strikethrough, + Underline, + InlineQuote, + Superscript, + Subscript, + Color, + LineBreak, + EscSpecialChars, + ]) + + def insert_after(self, element: Type[AbstractMarkup], new_element: Type[AbstractMarkup]): + index = self.index(element) + self.insert(index + 1, new_element) + + def replace(self, old_element: Type[AbstractMarkup], new_element: Type[AbstractMarkup]): + index = self.index(old_element) + self[index] = new_element + + def expr( + self, + inline_markup: Forward, + markup: Forward, + usernames: dict, + elements: Iterable[Type[AbstractMarkup]], + ) -> ParseExpression: + return MatchFirst([ + element(inline_markup=inline_markup, markup=markup, usernames=usernames).expr + for element in elements + ]) diff --git a/jira2markdown/markup/advanced.py b/jira2markdown/markup/advanced.py index 8232d8a..31c40fb 100644 --- a/jira2markdown/markup/advanced.py +++ b/jira2markdown/markup/advanced.py @@ -1,8 +1,10 @@ -from pyparsing import Combine, FollowedBy, Forward, Group, Literal, OneOrMore, Optional, ParseResults, ParserElement, \ +from pyparsing import Combine, FollowedBy, Group, Literal, OneOrMore, Optional, ParseResults, ParserElement, \ QuotedString, SkipTo, Suppress, Word, alphanums, alphas +from jira2markdown.markup.base import AbstractMarkup -class Noformat: + +class Noformat(AbstractMarkup): def action(self, tokens: ParseResults) -> str: text = tokens[0].strip("\n") return f"```\n{text}\n```" @@ -12,7 +14,7 @@ def expr(self) -> ParserElement: return QuotedString("{noformat}", multiline=True).setParseAction(self.action) -class Code: +class Code(AbstractMarkup): def action(self, tokens: ParseResults) -> str: lang = tokens.lang or "Java" text = tokens.text.strip("\n") @@ -33,18 +35,19 @@ def expr(self) -> ParserElement: ).setParseAction(self.action) -class Panel: - def __init__(self, markup: Forward): - self.markup = markup - +class Panel(AbstractMarkup): def action(self, tokens: ParseResults) -> str: - text = self.markup.transformString(tokens.text.strip()) - for param, value in tokens.get("params", []): if param.lower() == "title": - text = f"**{value}**\n{text}" + prefix = f"> **{value}**\n" + break + else: + prefix = "" - return "\n".join([f"> {line.lstrip()}" for line in text.splitlines()]) + text = self.markup.transformString("\n".join([ + line.lstrip() for line in tokens.text.strip().splitlines() + ])) + return prefix + "\n".join([f"> {line}" for line in text.splitlines()]) @property def expr(self) -> ParserElement: diff --git a/jira2markdown/markup/base.py b/jira2markdown/markup/base.py new file mode 100644 index 0000000..9cba763 --- /dev/null +++ b/jira2markdown/markup/base.py @@ -0,0 +1,15 @@ +from pyparsing import Forward, ParserElement + + +class AbstractMarkup: + is_inline_element: bool = True + + def __init__(self, inline_markup: Forward, markup: Forward, usernames: dict): + self.inline_markup = inline_markup + self.markup = markup + self.usernames = usernames + self.init_kwargs = dict(inline_markup=inline_markup, markup=markup, usernames=usernames) + + @property + def expr(self) -> ParserElement: + raise NotImplementedError diff --git a/jira2markdown/markup/headings.py b/jira2markdown/markup/headings.py index b1232f1..4e26c09 100644 --- a/jira2markdown/markup/headings.py +++ b/jira2markdown/markup/headings.py @@ -1,11 +1,18 @@ -from pyparsing import Combine, ParseResults, ParserElement, StringStart, Word +from pyparsing import Combine, LineEnd, ParseResults, ParserElement, SkipTo, StringEnd, StringStart, Word +from jira2markdown.markup.base import AbstractMarkup + + +class Headings(AbstractMarkup): + is_inline_element = False -class Headings: def action(self, tokens: ParseResults) -> str: - return "#" * int(tokens[0][1]) + " " + return "#" * int(tokens.level[1]) + " " + self.inline_markup.transformString(tokens.text) @property def expr(self) -> ParserElement: - return ("\n" | StringStart()) \ - + Combine(Word("h", "123456", exact=2) + ". ").setParseAction(self.action) + return ("\n" | StringStart()) + Combine( + Word("h", "123456", exact=2).setResultsName("level") + + ". " + + SkipTo(LineEnd() | StringEnd()).setResultsName("text"), + ).setParseAction(self.action) diff --git a/jira2markdown/markup/images.py b/jira2markdown/markup/images.py index e155a87..11dfef9 100644 --- a/jira2markdown/markup/images.py +++ b/jira2markdown/markup/images.py @@ -3,8 +3,10 @@ from pyparsing import Combine, Optional, ParseResults, ParserElement, PrecededBy, Regex, SkipTo, StringStart, Word, \ printables +from jira2markdown.markup.base import AbstractMarkup -class Image: + +class Image(AbstractMarkup): def action(self, tokens: ParseResults) -> str: return f"![{tokens.url}]({tokens.url})" diff --git a/jira2markdown/markup/links.py b/jira2markdown/markup/links.py index 2b78ce6..8819fa2 100644 --- a/jira2markdown/markup/links.py +++ b/jira2markdown/markup/links.py @@ -1,48 +1,59 @@ from string import punctuation -from pyparsing import CaselessLiteral, Char, Combine, FollowedBy, Forward, Optional, ParseResults, ParserElement, \ +from pyparsing import CaselessLiteral, Char, Combine, FollowedBy, Optional, ParseResults, ParserElement, \ PrecededBy, SkipTo, StringEnd, StringStart, Suppress, White, Word, alphanums +from jira2markdown.markup.base import AbstractMarkup -class MailTo: + +class MailTo(AbstractMarkup): def action(self, tokens: ParseResults) -> str: - return f"<{tokens.email}>" + alias = self.markup.transformString(getattr(tokens, "alias", "")) + email = tokens.email + + if (alias == email) or (len(alias.strip()) == 0): + return f"<{email}>" + else: + return f"[{alias}](mailto:{tokens.email})" @property def expr(self) -> ParserElement: return Combine( "[" - + Optional( - SkipTo("|", failOn="]") + Suppress("|"), - ) + + Optional(SkipTo("|", failOn="]").setResultsName("alias") + "|") + "mailto:" - + Word(alphanums + "@.-").setResultsName("email") + + Word(alphanums + "@.-_").setResultsName("email") + "]", ).setParseAction(self.action) -class Link: - def __init__(self, markup: Forward): - self.markup = markup +class Link(AbstractMarkup): + URL_PREFIXES = ["http", "ftp"] def action(self, tokens: ParseResults) -> str: - alias = getattr(tokens, "alias", "") + alias = self.markup.transformString(getattr(tokens, "alias", "")) url = tokens.url - if len(alias) > 0: - alias = self.markup.transformString(alias) - return f"[{alias}]({url})" - else: - return f"<{url}>" + if url.lower().startswith("www."): + url = f"https://{url}" + + if not any(map(url.lower().startswith, self.URL_PREFIXES)): + url = self.markup.transformString(url) + return fr"[{alias}\|{url}]" if alias else f"[{url}]" + + return f"[{alias}]({url})" if len(alias) > 0 else f"<{url}>" @property def expr(self) -> ParserElement: - ALIAS_LINK = SkipTo("|", failOn="]").setResultsName("alias") + "|" + SkipTo("]").setResultsName("url") - LINK = Combine("http" + SkipTo("]")).setResultsName("url") - return Combine("[" + (LINK ^ ALIAS_LINK) + "]").setParseAction(self.action) + return Combine( + "[" + + Optional(SkipTo("|", failOn="]").setResultsName("alias") + "|") + + SkipTo("]").setResultsName("url") + + "]", + ).setParseAction(self.action) -class Attachment: +class Attachment(AbstractMarkup): def action(self, tokens: ParseResults) -> str: return f"[{tokens.filename}]({tokens.filename})" @@ -51,10 +62,7 @@ def expr(self) -> ParserElement: return Combine("[^" + SkipTo("]").setResultsName("filename") + "]").setParseAction(self.action) -class Mention: - def __init__(self, usernames: dict): - self.usernames = usernames - +class Mention(AbstractMarkup): def action(self, tokens: ParseResults) -> str: username = self.usernames.get(tokens.accountid) return f"@{tokens.accountid}" if username is None else f"@{username}" diff --git a/jira2markdown/markup/lists.py b/jira2markdown/markup/lists.py index 84eb716..042d86c 100644 --- a/jira2markdown/markup/lists.py +++ b/jira2markdown/markup/lists.py @@ -1,29 +1,111 @@ -from pyparsing import Combine, LineStart, OneOrMore, Optional, ParseResults, ParserElement, Suppress, White +from pyparsing import Char, Combine, LineEnd, LineStart, Literal, MatchFirst, OneOrMore, Optional, ParseResults, \ + ParserElement, SkipTo, StringEnd, White +from jira2markdown.markup.advanced import Panel +from jira2markdown.markup.base import AbstractMarkup +from jira2markdown.markup.text_effects import BlockQuote, Color + + +class ListIndentState: + def __init__(self): + self.indent = 0 + + def reset(self): + self.indent = 0 + + +class ListIndent(ParserElement): + def __init__(self, indent_state: ListIndentState, tokens: str, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.name = "ListIndent" + self.indent_state = indent_state + self.tokens = tokens + + def parseImpl(self, instring, loc, doActions=True): + exprs = [] + for token in self.tokens: + for indent in range(self.indent_state.indent + 1, max(0, self.indent_state.indent - 2), -1): + exprs.append(Literal(token * indent + " ")) + + loc, result = MatchFirst(exprs).parseImpl(instring, loc, doActions) + self.indent_state.indent = len(result[0]) - 1 + return loc, result + + +class List(AbstractMarkup): + is_inline_element = False + + def __init__(self, nested_token: str, nested_indent: int, tokens: str, indent: int, bullet: str, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.nested_token = nested_token + self.nested_indent = nested_indent + self.tokens = tokens + self.indent = indent + self.bullet = bullet + + self.indent_state = ListIndentState() -class UnorderedList: def action(self, tokens: ParseResults) -> str: - count = len(tokens.nested) * 3 + len(tokens.chars) * 2 - 2 - return " " * count + "- " + result = [] + + for line in tokens: + bullets, text = line.split(" ", maxsplit=1) + + nested_indent = 0 + while bullets[0] == self.nested_token: + nested_indent += 1 + bullets = bullets[1:] + + count = nested_indent * self.nested_indent + len(bullets) * self.indent + + line_padding = " " * count + item_padding = " " * (count - self.indent) + self.bullet + " " + text = self.markup.transformString(text).splitlines() or [""] + + result.append( + "\n".join([ + item_padding + line if i == 0 else line_padding + line + for i, line in enumerate(text) + ]), + ) + + self.indent_state.reset() + text_end = "\n" if (tokens[-1][-1] == "\n") else "" + return "\n".join(result) + text_end @property def expr(self) -> ParserElement: - return LineStart() + Combine( - Optional("#", default="").setResultsName("nested") - + (OneOrMore("*") | OneOrMore("-")).setResultsName("chars") - + Suppress(" "), - ).setParseAction(self.action) + ~White() + NL = LineEnd() + LIST_BREAK = NL + Optional(White(" \t")) + NL | StringEnd() + IGNORE = BlockQuote(**self.init_kwargs).expr | Panel(**self.init_kwargs).expr | Color(**self.init_kwargs).expr + ROW = LineStart() + Combine( + Optional(self.nested_token, default="") + + ListIndent(self.indent_state, self.tokens) + + SkipTo(NL + Char(self.nested_token + self.tokens) | LIST_BREAK, ignore=IGNORE) + + Optional(NL), + ) + return OneOrMore(ROW, stopOn=LIST_BREAK).setParseAction(self.action) + + +class UnorderedList(List): + def __init__(self, *args, **kwargs): + super().__init__(nested_token="#", nested_indent=3, tokens="*-", indent=2, bullet="-", *args, **kwargs) -class OrderedList: def action(self, tokens: ParseResults) -> str: - count = len(tokens.nested) * 2 + len(tokens.chars) * 3 - 3 - return " " * count + "1. " + result = super().action(tokens) + first_line = (result.splitlines() or [""])[0].strip() - @property - def expr(self) -> ParserElement: - return LineStart() + Combine( - Optional("*", default="").setResultsName("nested") - + OneOrMore("#").setResultsName("chars") - + Suppress(" "), - ).setParseAction(self.action) + ~White() + # Text with dashed below it turns into a heading. To prevent this + # add a line break before an empty list. + if first_line == "-": + return "\n" + result + else: + return result + + +class OrderedList(List): + def __init__(self, *args, **kwargs): + super().__init__(nested_token="*", nested_indent=2, tokens="#", indent=3, bullet="1.", *args, **kwargs) diff --git a/jira2markdown/markup/tables.py b/jira2markdown/markup/tables.py index 8199086..a3d3e8e 100644 --- a/jira2markdown/markup/tables.py +++ b/jira2markdown/markup/tables.py @@ -1,13 +1,13 @@ -from pyparsing import Combine, Forward, Group, LineEnd, LineStart, Literal, OneOrMore, Optional, ParseResults, \ +from pyparsing import Combine, Group, LineEnd, LineStart, Literal, OneOrMore, Optional, ParseResults, \ ParserElement, SkipTo, StringEnd, StringStart, White, ZeroOrMore +from jira2markdown.markup.base import AbstractMarkup from jira2markdown.markup.images import Image from jira2markdown.markup.links import Link, MailTo, Mention -class Table: - def __init__(self, markup: Forward): - self.markup = markup +class Table(AbstractMarkup): + is_inline_element = False def action(self, tokens: ParseResults) -> str: lines = [line for line in tokens if len(line) > 0] @@ -38,7 +38,8 @@ def expr(self) -> ParserElement: NL = LineEnd().suppress() SEP = (Literal("||") | Literal("|")).suppress() ROW_BREAK = NL + SEP | NL + NL | StringEnd() - IGNORE = Link(self.markup).expr | MailTo().expr | Image().expr | Mention({}).expr + IGNORE = Link(**self.init_kwargs).expr | MailTo(**self.init_kwargs).expr \ + | Image(**self.init_kwargs).expr | Mention(**self.init_kwargs).expr ROW = SEP + ZeroOrMore( SkipTo(SEP | ROW_BREAK, ignore=IGNORE) + Optional(SEP), diff --git a/jira2markdown/markup/text_breaks.py b/jira2markdown/markup/text_breaks.py index dfc1fff..7508334 100644 --- a/jira2markdown/markup/text_breaks.py +++ b/jira2markdown/markup/text_breaks.py @@ -1,13 +1,15 @@ from pyparsing import Keyword, LineEnd, ParserElement, StringStart, WordEnd, WordStart, replaceWith +from jira2markdown.markup.base import AbstractMarkup -class LineBreak: + +class LineBreak(AbstractMarkup): @property def expr(self) -> ParserElement: return Keyword("\\\\", identChars="\\").setParseAction(replaceWith("\n")) -class Ndash: +class Ndash(AbstractMarkup): @property def expr(self) -> ParserElement: return WordStart() \ @@ -15,7 +17,7 @@ def expr(self) -> ParserElement: + WordEnd() -class Mdash: +class Mdash(AbstractMarkup): @property def expr(self) -> ParserElement: return WordStart() \ @@ -23,11 +25,13 @@ def expr(self) -> ParserElement: + WordEnd() -class Ruler: +class Ruler(AbstractMarkup): + is_inline_element = False + @property def expr(self) -> ParserElement: # Text with dashed below it turns into a heading. To prevent this # add a line break before the dashes. - return ("\n" | StringStart() | LineBreak().expr) \ + return ("\n" | StringStart() | LineBreak(**self.init_kwargs).expr) \ + Keyword("----", identChars="-").setParseAction(replaceWith("\n----")) \ + LineEnd() diff --git a/jira2markdown/markup/text_effects.py b/jira2markdown/markup/text_effects.py index 525d9f2..17667cf 100644 --- a/jira2markdown/markup/text_effects.py +++ b/jira2markdown/markup/text_effects.py @@ -1,133 +1,92 @@ import re -from pyparsing import CaselessLiteral, Char, Combine, Forward, LineEnd, Literal, Optional, ParseResults, \ - ParserElement, PrecededBy, QuotedString, Regex, SkipTo, StringStart, Suppress, White, Word, WordEnd, WordStart, \ - alphanums, alphas, hexnums, nums, replaceWith +from pyparsing import CaselessLiteral, Char, Combine, FollowedBy, Literal, Optional, ParseResults, ParserElement, \ + PrecededBy, QuotedString, Regex, SkipTo, StringEnd, StringStart, Suppress, White, Word, alphas, hexnums, nums, \ + replaceWith -from jira2markdown.markup.links import Attachment, Mention +from jira2markdown.markup.base import AbstractMarkup +from jira2markdown.markup.images import Image +from jira2markdown.markup.links import Attachment, Link, Mention -class Bold: - def __init__(self, markup: Forward): - self.markup = markup +class QuotedElement(AbstractMarkup): + TOKEN = "" + QUOTE_CHAR = "" + END_QUOTE_CHAR = "" def action(self, tokens: ParseResults) -> str: - return "**" + self.markup.transformString(tokens[0]) + "**" - - @property - def expr(self) -> ParserElement: - TOKEN = Suppress("*") - IGNORE = White() + TOKEN | Color(self.markup).expr - return (StringStart() | PrecededBy(Regex(r"\W", flags=re.UNICODE), retreat=1)) + Combine( - TOKEN - + (~White() & ~TOKEN) - + SkipTo(TOKEN, ignore=IGNORE, failOn=LineEnd()) - + TOKEN - + ~Char(alphanums), - ).setParseAction(self.action) + return self.QUOTE_CHAR \ + + self.inline_markup.transformString(tokens[0]) \ + + (self.END_QUOTE_CHAR or self.QUOTE_CHAR) - -class Strikethrough: - def __init__(self, markup: Forward): - self.markup = markup - - def action(self, tokens: ParseResults) -> str: - return "~~" + self.markup.transformString(tokens[0]) + "~~" + def get_ignore_expr(self) -> ParserElement: + return Color(**self.init_kwargs).expr @property def expr(self) -> ParserElement: - TOKEN = Suppress("-") - IGNORE = White() + TOKEN | Color(self.markup).expr - return WordStart() + Combine( + NON_ALPHANUMS = Regex(r"\W", flags=re.UNICODE) + TOKEN = Suppress(self.TOKEN) + IGNORE = White() + TOKEN | self.get_ignore_expr() + ELEMENT = Combine( TOKEN - + ~White() + + (~White() & ~Char(self.TOKEN)) + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") - + TOKEN, - ).setParseAction(self.action) + WordEnd() + + TOKEN + + FollowedBy(NON_ALPHANUMS | StringEnd()), + ) + return (StringStart() | PrecededBy(NON_ALPHANUMS, retreat=1)) \ + + Combine(ELEMENT.setParseAction(self.action) + Optional(~ELEMENT, default=" ")) -class Underline: - def __init__(self, markup: Forward): - self.markup = markup - def action(self, tokens: ParseResults) -> str: - return self.markup.transformString(tokens[0]) +class Bold(QuotedElement): + TOKEN = "*" + QUOTE_CHAR = "**" - @property - def expr(self) -> ParserElement: - TOKEN = Suppress("+") - IGNORE = White() + TOKEN | Color(self.markup).expr - return WordStart() + Combine( - TOKEN - + ~White() - + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") - + TOKEN, - ).setParseAction(self.action) + WordEnd() +class Strikethrough(QuotedElement): + TOKEN = "-" + QUOTE_CHAR = "~~" -class InlineQuote: - def __init__(self, markup: Forward): - self.markup = markup + def get_ignore_expr(self) -> ParserElement: + return Color(**self.init_kwargs).expr \ + | Attachment(**self.init_kwargs).expr \ + | Mention(**self.init_kwargs).expr \ + | Link(**self.init_kwargs).expr \ + | Image(**self.init_kwargs).expr - def action(self, tokens: ParseResults) -> str: - return "" + self.markup.transformString(tokens[0]) + "" - - @property - def expr(self) -> ParserElement: - TOKEN = Suppress("??") - IGNORE = White() + TOKEN | Color(self.markup).expr - return WordStart() + Combine( - TOKEN - + ~White() - + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") - + TOKEN, - ).setParseAction(self.action) + WordEnd() +class Underline(QuotedElement): + TOKEN = "+" -class Superscript: - def __init__(self, markup: Forward): - self.markup = markup - def action(self, tokens: ParseResults) -> str: - return "" + self.markup.transformString(tokens[0]) + "" +class InlineQuote(QuotedElement): + TOKEN = "??" + QUOTE_CHAR = "" + END_QUOTE_CHAR = "" - @property - def expr(self) -> ParserElement: - TOKEN = Suppress("^") - IGNORE = White() + TOKEN | Color(self.markup).expr | Attachment().expr - return WordStart() + Combine( - TOKEN - + ~White() - + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") - + TOKEN, - ).setParseAction(self.action) + WordEnd() +class Superscript(QuotedElement): + TOKEN = "^" + QUOTE_CHAR = "" + END_QUOTE_CHAR = "" -class Subscript: - def __init__(self, markup: Forward): - self.markup = markup + def get_ignore_expr(self) -> ParserElement: + return Color(**self.init_kwargs).expr | Attachment(**self.init_kwargs).expr - def action(self, tokens: ParseResults) -> str: - return "" + self.markup.transformString(tokens[0]) + "" - @property - def expr(self) -> ParserElement: - TOKEN = Suppress("~") - IGNORE = White() + TOKEN | Color(self.markup).expr | Mention({}).expr - return WordStart() + Combine( - TOKEN - + ~White() - + SkipTo(TOKEN, ignore=IGNORE, failOn="\n") - + TOKEN, - ).setParseAction(self.action) + WordEnd() +class Subscript(QuotedElement): + TOKEN = "~" + QUOTE_CHAR = "" + END_QUOTE_CHAR = "" + def get_ignore_expr(self) -> ParserElement: + return Color(**self.init_kwargs).expr | Mention(**self.init_kwargs).expr -class Color: - def __init__(self, markup: Forward): - self.markup = markup +class Color(AbstractMarkup): def action(self, tokens: ParseResults) -> str: - text = self.markup.transformString(tokens.text) + text = self.inline_markup.transformString(tokens.text) if tokens.red and tokens.green and tokens.blue: color = f"#{int(tokens.red):x}{int(tokens.green):x}{int(tokens.blue):x}" @@ -160,26 +119,27 @@ def expr(self) -> ParserElement: return expr.setParseAction(self.action) -class Quote: +class Quote(AbstractMarkup): + is_inline_element = False + @property def expr(self) -> ParserElement: return ("\n" | StringStart()) + Literal("bq. ").setParseAction(replaceWith("> ")) -class BlockQuote: - def __init__(self, markup: Forward): - self.markup = markup - +class BlockQuote(AbstractMarkup): def action(self, tokens: ParseResults) -> str: - text = self.markup.transformString(tokens[0].strip()) - return "\n".join([f"> {line.lstrip()}" for line in text.splitlines()]) + text = self.markup.transformString("\n".join([ + line.lstrip() for line in tokens[0].strip().splitlines() + ])) + return "\n".join([f"> {line}" for line in text.splitlines()]) @property def expr(self) -> ParserElement: return QuotedString("{quote}", multiline=True).setParseAction(self.action) -class Monospaced: +class Monospaced(AbstractMarkup): def action(self, tokens: ParseResults) -> str: return f"`{tokens[0]}`" @@ -188,7 +148,7 @@ def expr(self) -> ParserElement: return QuotedString("{{", endQuoteChar="}}").setParseAction(self.action) -class EscSpecialChars: +class EscSpecialChars(AbstractMarkup): """ Escapes '*' characters that are not a part of any expression grammar """ diff --git a/jira2markdown/parser.py b/jira2markdown/parser.py index f106c17..08bc51f 100644 --- a/jira2markdown/parser.py +++ b/jira2markdown/parser.py @@ -2,49 +2,19 @@ from pyparsing import Forward, ParserElement -from jira2markdown.markup.advanced import Code, Noformat, Panel -from jira2markdown.markup.headings import Headings -from jira2markdown.markup.images import Image -from jira2markdown.markup.links import Attachment, Link, MailTo, Mention -from jira2markdown.markup.lists import OrderedList, UnorderedList -from jira2markdown.markup.tables import Table -from jira2markdown.markup.text_breaks import LineBreak, Mdash, Ndash, Ruler -from jira2markdown.markup.text_effects import BlockQuote, Bold, Color, EscSpecialChars, InlineQuote, Monospaced, \ - Quote, Strikethrough, Subscript, Superscript, Underline +from jira2markdown.elements import MarkupElements ParserElement.setDefaultWhitespaceChars(" \t") -def convert(text: str, usernames: Optional[dict] = None) -> str: +def convert(text: str, usernames: Optional[dict] = None, elements: Optional[MarkupElements] = None) -> str: usernames = usernames or {} + elements = elements or MarkupElements() + + inline_markup = Forward() markup = Forward() - markup <<= UnorderedList().expr | \ - OrderedList().expr | \ - Code().expr | \ - Noformat().expr | \ - Monospaced().expr | \ - Mention(usernames).expr | \ - MailTo().expr | \ - Attachment().expr | \ - Link(markup).expr | \ - Image().expr | \ - Table(markup).expr | \ - Headings().expr | \ - Quote().expr | \ - BlockQuote(markup).expr | \ - Panel(markup).expr | \ - Bold(markup).expr | \ - Ndash().expr | \ - Mdash().expr | \ - Ruler().expr | \ - Strikethrough(markup).expr | \ - Underline(markup).expr | \ - InlineQuote(markup).expr | \ - Superscript(markup).expr | \ - Subscript(markup).expr | \ - Color(markup).expr | \ - LineBreak().expr | \ - EscSpecialChars().expr + inline_markup << elements.expr(inline_markup, markup, usernames, filter(lambda e: e.is_inline_element, elements)) + markup << elements.expr(inline_markup, markup, usernames, elements) return markup.transformString(text) diff --git a/pyproject.toml b/pyproject.toml index ce740a8..3f4a610 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "jira2markdown" -version = "0.1.8" +version = "0.2.0" description = "Convert text from JIRA markup to Markdown using parsing expression grammars" authors = ["Evgeniy Krysanov "] readme = "README.md" diff --git a/setup.cfg b/setup.cfg index 8844a08..7a15803 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,9 +2,11 @@ statistics = True max-line-length = 120 -# W291 trailing whitespace -# W503 line break before binary operator -ignore = W291, W503 +exclude = + .git, + __pycache__, + .pytest_cache, + tests # flake8-coding no-accept-encodings = True diff --git a/tests/markup/test_advanced.py b/tests/markup/test_advanced.py index 1c6bd78..c1c39c2 100644 --- a/tests/markup/test_advanced.py +++ b/tests/markup/test_advanced.py @@ -67,10 +67,12 @@ class TestPanel: def test_basic_conversion(self): assert convert(""" {panel} -Some text + Some text + more line {panel} """) == """ > Some text +> more line """ def test_title(self): diff --git a/tests/markup/test_elements.py b/tests/markup/test_elements.py new file mode 100644 index 0000000..706c94c --- /dev/null +++ b/tests/markup/test_elements.py @@ -0,0 +1,27 @@ +from pyparsing import ParserElement + +from jira2markdown.elements import MarkupElements + + +class ElementA(ParserElement): + pass + + +class ElementB(ParserElement): + pass + + +class ElementC(ParserElement): + pass + + +class TestMarkupElements: + def test_insert_after(self): + elements = MarkupElements([ElementA, ElementB]) + elements.insert_after(ElementA, ElementC) + assert list(elements) == [ElementA, ElementC, ElementB] + + def test_replace(self): + elements = MarkupElements([ElementA, ElementB]) + elements.replace(ElementB, ElementC) + assert list(elements) == [ElementA, ElementC] diff --git a/tests/markup/test_links.py b/tests/markup/test_links.py index 316780d..1b7b16d 100644 --- a/tests/markup/test_links.py +++ b/tests/markup/test_links.py @@ -3,21 +3,26 @@ class TestMailTo: def test_basic_conversion(self): - assert convert("[mailto:service@atlassian.com]") == "" + assert convert("[mailto:box@example.com]") == "" def test_alias(self): - assert convert("[Some text|mailto:service@atlassian.com]") == "" + assert convert("[box@example.com|mailto:box@example.com]") == "" + assert convert("[Some text|mailto:home_box@domain-name.com]") == "[Some text](mailto:home_box@domain-name.com)" class TestLink: def test_basic_conversion(self): - assert convert("[http://jira.atlassian.com]") == "" + assert convert("[http://example.com]") == "" + assert convert("[ftp://example.com]") == "" + assert convert("[WWW.EXAMPLE.COM]") == "" def test_alias(self): - assert convert("[Atlassian|http://atlassian.com]") == "[Atlassian](http://atlassian.com)" + assert convert("[Example text|http://example.com]") == "[Example text](http://example.com)" - def test_exceptions(self): + def test_text(self): assert convert("[Text in square brackets]") == "[Text in square brackets]" + assert convert("[Some text|]") == r"[Some text\|]" + assert convert("[Some text|More text]") == r"[Some text\|More text]" class TestAttachment: diff --git a/tests/markup/test_lists.py b/tests/markup/test_lists.py index 922887c..5f4b01f 100644 --- a/tests/markup/test_lists.py +++ b/tests/markup/test_lists.py @@ -46,19 +46,84 @@ def test_match_start_conditions(self): assert convert("\n* Item") == "\n- Item" assert convert(" * Item") == r" \* Item" + def test_multiline(self): + assert convert(""" +* Item +Line +Next + +Break +""") == """ +- Item + Line + Next + +Break +""" + + def test_start_indent(self): + assert convert(""" +* First +** Second + +** One +*** Two + +* Last +** One +""") == r""" +- First + - Second + +\*\* One +\*\*\* Two + +- Last + - One +""" + assert convert(""" +-- One +--- Two +""") == """ +– One +— Two +""" + + def test_indent_step(self): + assert convert(""" +* one +** two +**** four +""") == r""" +- one + - two +\*\*\*\* four +""" + def test_empty_list(self): assert convert(""" * ** - -- - """) == r""" -\* -\*\* + """) == """ + +- + - - -– + - """ + def test_text_indent(self): + assert convert(""" +Some text +* +""") == """ +Some text + +- +""" + class TestOrderedList: def test_bullets(self): @@ -86,3 +151,59 @@ def test_mixed_bullets(self): 1. numbered 1. list """ + + def test_multiline(self): + assert convert(""" +# Item +Line +Next + +Break +""") == """ +1. Item + Line + Next + +Break +""" + + def test_start_indent(self): + assert convert(""" +# First +## Second + +## One +### Two + +# Last +## One +""") == """ +1. First + 1. Second + +## One +### Two + +1. Last + 1. One +""" + + def test_indent_step(self): + assert convert(""" +# one +## two +#### four +""") == r""" +1. one + 1. two +#### four +""" + + def test_empty_list(self): + assert convert(""" +# +## + """) == r""" +1. + 1. + """ diff --git a/tests/markup/test_mixed_content.py b/tests/markup/test_mixed_content.py index d0516c9..bd4b01d 100644 --- a/tests/markup/test_mixed_content.py +++ b/tests/markup/test_mixed_content.py @@ -1,3 +1,5 @@ +import pytest + from jira2markdown import convert @@ -6,6 +8,19 @@ def test_ruler(self): assert convert(r"Text\\ ---- ") == "Text\n\n----" +class TestBlockQuoteContent: + def test_list(self): + assert convert(""" +{quote} +* Item +** Line +{quote} +""") == """ +> - Item +> - Line +""" + + class TestRecursiveContent: def test_bold_color(self): assert convert("*text {color:red}*text inside*{color} outside*") == \ @@ -51,6 +66,153 @@ def test_subscript_mention(self): assert convert("~text [~username] outside~") == "text @username outside" +@pytest.mark.parametrize("token,test_input,expected", [ + ("headings", "h2. %s", "## %s"), + ("bold", "*%s*", "**%s**"), + ("strikethrough", "-%s-", "~~%s~~"), + ("underline", "+%s+", "%s"), + ("inlinequote", "??%s??", "%s"), + ("superscript", "^%s^", "%s"), + ("subscript", "~%s~", "%s"), + ("color", "{color:red}%s{color}", '%s'), + ("quote", "bq. %s", "> %s"), +], ids=["headings", "bold", "strikethrough", "underline", "inlinequote", "superscript", "subscript", "color", "quote"]) +class TestInlineElements: + def test_headings(self, token, test_input, expected): + assert convert(test_input % "h2. Heading") == expected % "h2. Heading" + + def test_limited_markup(self, token, test_input, expected): + if token == "subscript": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "h2. ~Heading~") == expected % "h2. Heading" + + def test_quote(self, token, test_input, expected): + assert convert(test_input % "bq. Quote") == expected % "bq. Quote" + + def test_table(self, token, test_input, expected): + assert convert(test_input % "|Table") == expected % "|Table" + + def test_list(self, token, test_input, expected): + if token == "bold": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "* Item") == expected % r"\* Item" + + def test_ruler(self, token, test_input, expected): + if token == "strikethrough": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "----") == expected % "----" + + def test_bold(self, token, test_input, expected): + if token == "bold": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "*Bold text*") == expected % "**Bold text**" + + def test_color(self, token, test_input, expected): + if token == "color": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "{color:red}Red text{color}") == expected % 'Red text' + + def test_blockquote(self, token, test_input, expected): + assert convert(test_input % "{quote}Quote text{quote}") == expected % "> Quote text" + + def test_monospaced(self, token, test_input, expected): + assert convert(test_input % "{{monospaced}}") == expected % "`monospaced`" + + def test_image(self, token, test_input, expected): + assert convert(test_input % "!attached-image.gif!") == expected % "![attached-image.gif](attached-image.gif)" + + def test_link(self, token, test_input, expected): + assert convert(test_input % "[http://example.com]") == expected % "" + + def test_mention(self, token, test_input, expected): + assert convert(test_input % "[~username]") == expected % "@username" + + +@pytest.mark.parametrize("token,test_input,expected", [ + ("blockquote", "{quote}%s{quote}", ["> %s", "> %s"]), + ("panel", "{panel}%s{panel}", ["> %s", "> %s"]), + ("table", "|%s\n|row", ["|%s|\n|-|\n|row|\n"]), + ("unordered_list", "* %s", ["- %s", " %s"]), + ("ordered_list", "# %s", ["1. %s", " %s"]), +], ids=["blockquote", "panel", "table", "unordered_list", "ordered_list"]) +class TestBlockElements: + def render_expected(self, expected, text): + if len(expected) == 1: + return expected[0] % text + + first_line, next_line = expected + return "\n".join([ + first_line % line if i == 0 else next_line % line + for i, line in enumerate(text.splitlines()) + ]) + + def test_headings(self, token, test_input, expected): + assert convert(test_input % "h2. Heading") == self.render_expected(expected, "## Heading") + + def test_quote(self, token, test_input, expected): + assert convert(test_input % "bq. Quote") == self.render_expected(expected, "> Quote") + + def test_table(self, token, test_input, expected): + if token == "table": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "|Table") == self.render_expected(expected, "|Table|\n|-|\n") + + def test_list(self, token, test_input, expected): + if token in ["unordered_list", "ordered_list"]: + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "* Item") == self.render_expected(expected, "- Item") + + def test_bold(self, token, test_input, expected): + assert convert(test_input % "*Bold text*") == self.render_expected(expected, "**Bold text**") + + def test_color(self, token, test_input, expected): + assert convert(test_input % "{color:red}Red text{color}") == \ + self.render_expected(expected, 'Red text') + + def test_blockquote(self, token, test_input, expected): + if token == "blockquote": + pytest.skip(f"Skip nested tests for {token} token") + else: + assert convert(test_input % "{quote}Quote text{quote}") == self.render_expected(expected, "> Quote text") + + def test_monospaced(self, token, test_input, expected): + assert convert(test_input % "{{monospaced}}") == self.render_expected(expected, "`monospaced`") + + def test_image(self, token, test_input, expected): + assert convert(test_input % "!attached-image.gif!") == \ + self.render_expected(expected, "![attached-image.gif](attached-image.gif)") + + def test_link(self, token, test_input, expected): + assert convert(test_input % "[http://example.com]") == self.render_expected(expected, "") + + def test_mention(self, token, test_input, expected): + assert convert(test_input % "[~username]") == self.render_expected(expected, "@username") + + +class TestStrikethroughContent: + def test_color(self): + assert convert("-{color:red}-text-{color}-") == '~~~~text~~~~' + + def test_attachment(self): + assert convert("-[^file-name.ext]-") == "~~[file-name.ext](file-name.ext)~~" + + def test_mention(self): + assert convert("-[~user-name]-") == "~~@user-name~~" + + def test_link(self): + assert convert("-[http://site-name.tld]-") == "~~~~" + + def test_image(self): + assert convert("-!attached-image.gif!-") == "~~![attached-image.gif](attached-image.gif)~~" + + class TestTableContent: def test_basic_markup(self): assert convert("| Table *bold header* and {color:red}colored title{color} |") == \ @@ -64,7 +226,7 @@ def test_cell_link(self): def test_cell_mailto(self): assert convert("|[mailto:user@example.com]|") == "||\n|-|\n" - assert convert("|[alias|mailto:user@example.com]|") == "||\n|-|\n" + assert convert("|[-alias-|mailto:user@example.com]|") == "|[~~alias~~](mailto:user@example.com)|\n|-|\n" def test_cell_mention(self): assert convert("|[user|~uuid]|", {"uuid": "elliot"}) == "|@elliot|\n|-|\n" @@ -82,3 +244,156 @@ def test_text_formatting(self): > a block of text surrounded with a **panel** > line with ![image.png](image.png) """ + + def test_list(self): + assert convert(""" +{panel} +* Item +** Line +{panel} +""") == """ +> - Item +> - Line +""" + + +class TestListContent: + def test_broken_list_markup(self): + assert convert(""" +----- Hello, -World-! ----- +""") == """ +----- Hello, ~~World~~! ----- +""" + assert convert(""" +-- +Hello +{quote} +World +{quote} +""") == """ +– +Hello +> World +""" + assert convert(""" +--------- + +-- +""") == """ +--------- + +– +""" + + def test_list_blockquote(self): + assert convert(""" +* Item +** Second +*** {quote} + + +Some quote +{quote} +**** Four +""") == """ +- Item + - Second + - > Some quote + - Four +""" + assert convert(""" +# Item +## Second +### {quote} + + +Some quote +{quote} +#### Four +""") == """ +1. Item + 1. Second + 1. > Some quote + 1. Four +""" + + def test_list_panel_list(self): + assert convert(""" +* Item +** Second +*** {panel} + + +Some quote +{panel} +**** Four +""") == """ +- Item + - Second + - > Some quote + - Four +""" + assert convert(""" +# Item +## Second +### {panel} + + +Some quote +{panel} +#### Four +""") == """ +1. Item + 1. Second + 1. > Some quote + 1. Four +""" + + def test_list_color_list(self): + assert convert(""" +* Item +** Second +*** {color:red} + + +Some text +{color} +**** Four +""") == """ +- Item + - Second + - + + + Some text + + - Four +""" + assert convert(""" +# Item +## Second +### {color:red} + + +Some text +{color} +#### Four +""") == """ +1. Item + 1. Second + 1. + + + Some text + + 1. Four +""" + + +class TestLink: + def test_alias_markup(self): + assert convert("[+box@example.com+|mailto:box@example.com]") == "" + + def test_text_markup(self): + assert convert("[Text in -square- brackets]") == "[Text in ~~square~~ brackets]" + assert convert("[Some *text*|More ^text^]") == r"[Some **text**\|More text]" diff --git a/tests/markup/test_text_effects.py b/tests/markup/test_text_effects.py index 661f834..6258835 100644 --- a/tests/markup/test_text_effects.py +++ b/tests/markup/test_text_effects.py @@ -26,9 +26,11 @@ def test_multiline(self): def test_single_token(self): assert convert("single *char") == r"single \*char" - def test_multi_surrounding_tokens(self): - assert convert("*bold**") == r"**bold**\*" - assert convert("**bold**") == r"\***bold**\*" + def test_adjacent_tokens(self): + assert convert("*some**text*") == "**some** **text**" + assert convert("*some* *text*") == "**some** **text**" + assert convert("**text**") == r"\***text**\*" + assert convert("**some****text**") == r"\***some**\*\***text**\*" def test_empty_text(self): assert convert("**") == r"\*\*" @@ -55,6 +57,12 @@ def test_match_end_conditions(self): def test_multiline(self): assert convert("-multiline\nstrikethrough-") == "-multiline\nstrikethrough-" + def test_adjacent_tokens(self): + assert convert("-some--text-") == "~~some~~ ~~text~~" + assert convert("-some- -text-") == "~~some~~ ~~text~~" + assert convert("--text--") == "-~~text~~-" + assert convert("--some----text--") == "-~~some~~--~~text~~-" + class TestUnderline: def test_basic_conversion(self): @@ -97,6 +105,12 @@ def test_match_end_conditions(self): def test_multiline(self): assert convert("??multiline\nunderline??") == "??multiline\nunderline??" + def test_adjacent_tokens(self): + assert convert("??some????text??") == "some text" + assert convert("??some?? ??text??") == "some text" + assert convert("????text????") == "??text??" + assert convert("????some????????text????") == "??some????text??" + class TestSuperscript: def test_basic_conversion(self): @@ -118,6 +132,12 @@ def test_match_end_conditions(self): def test_multiline(self): assert convert("^multiline\nunderline^") == "^multiline\nunderline^" + def test_adjacent_tokens(self): + assert convert("^some^^text^") == "some text" + assert convert("^some^ ^text^") == "some text" + assert convert("^^text^^") == "^text^" + assert convert("^^some^^^^text^^") == "^some^^text^" + class TestSubscript: def test_basic_conversion(self): @@ -139,6 +159,12 @@ def test_match_end_conditions(self): def test_multiline(self): assert convert("~multiline\nunderline~") == "~multiline\nunderline~" + def test_adjacent_tokens(self): + assert convert("~some~~text~") == "some text" + assert convert("~some~ ~text~") == "some text" + assert convert("~~text~~") == "~text~" + assert convert("~~some~~~~text~~") == "~some~~text~" + class TestColor: def test_color_value(self):