fixup! fix(jira2markdown): process windows line breaks as a Unix line…

… breaks
catcombo · Oct 7, 2024 · 863ec80 · 863ec80
1 parent 6252a47
commit 863ec80
Show file tree

Hide file tree

Showing 10 changed files with 58 additions and 30 deletions.
diff --git a/jira2markdown/markup/advanced.py b/jira2markdown/markup/advanced.py
@@ -58,8 +58,10 @@ def action(self, tokens: ParseResults) -> str:
         else:
             prefix = ""
 
-        text = self.markup.transform_string("\n".join([line.lstrip() for line in tokens.text.strip().splitlines()]))
-        return prefix + "\n".join([f"> {line}" for line in text.splitlines()])
+        text = self.markup.transform_string(
+            "".join([line.lstrip() for line in tokens.text.strip().splitlines(keepends=True)])
+        )
+        return prefix + "".join([f"> {line}" for line in text.splitlines(keepends=True)])
 
     @property
     def expr(self) -> ParserElement:

diff --git a/jira2markdown/markup/lists.py b/jira2markdown/markup/lists.py
@@ -63,7 +63,7 @@ def __init__(self, nested_token: str, nested_indent: int, tokens: str, indent: i
         self.indent_state = ListIndentState()
 
     def action(self, tokens: ParseResults) -> str:
-        result = []
+        result = ""
 
         for line in tokens:
             bullets, text = line.split(" ", maxsplit=1)
@@ -77,15 +77,12 @@ def action(self, tokens: ParseResults) -> str:
 
             line_padding = " " * count
             item_padding = " " * (count - self.indent) + self.bullet + " "
-            text = self.markup.transform_string(text).splitlines() or [""]
+            text = self.markup.transform_string(text).splitlines(keepends=True) or [""]
 
-            result.append(
-                "\n".join([item_padding + line if i == 0 else line_padding + line for i, line in enumerate(text)]),
-            )
+            result += "".join([item_padding + line if i == 0 else line_padding + line for i, line in enumerate(text)])
 
         self.indent_state.reset()
-        text_end = "\n" if (tokens[-1][-1] == "\n") else ""
-        return "\n".join(result) + text_end
+        return result
 
     @property
     def expr(self) -> ParserElement:
@@ -118,7 +115,7 @@ def action(self, tokens: ParseResults) -> str:
         result = super().action(tokens)
         first_line = (result.splitlines() or [""])[0].strip()
 
-        # Text with dashed below it turns into a heading. To prevent this
+        # Text with dashes below it turns into a heading. To prevent this
         # add a line break before an empty list.
         if first_line == "-":
             return "\n" + result

diff --git a/jira2markdown/markup/tables.py b/jira2markdown/markup/tables.py
@@ -25,16 +25,18 @@ class Table(AbstractMarkup):
     is_inline_element = False
 
     def action(self, tokens: ParseResults) -> str:
-        lines = [line for line in tokens if len(line) > 0]
-        max_columns_count = max(len(row) for row in tokens)
+        eol = tokens[0].eol or "\n"
+        stripped_tokens = [row[:-1] if row[-1] == eol else row for row in tokens]
+        max_columns_count = max(len(row) for row in stripped_tokens)
+        lines = [row for row in stripped_tokens if len(row) > 0]
 
         # Converts multiline text to one line,
         # because markdown doesn't support multiline text in table cells
         output = [
             "|"
             + "|".join(
                 map(
-                    lambda cell: cell.replace("\n", "<br>"),
+                    lambda cell: cell.replace(eol, "<br>"),
                     map(self.markup.transform_string, row),
                 ),
             )
@@ -48,7 +50,7 @@ def action(self, tokens: ParseResults) -> str:
         # Insert header delimiter after the first row
         output.insert(1, "|" + "---|" * max(max_columns_count, 1))
 
-        return "\n".join(output) + "\n"
+        return eol.join(output) + eol
 
     @property
     def expr(self) -> ParserElement:
@@ -62,14 +64,18 @@ def expr(self) -> ParserElement:
             | Mention(**self.init_kwargs).expr
         )
 
-        ROW = SEP + ZeroOrMore(
-            SkipTo(SEP | ROW_BREAK, ignore=IGNORE) + Optional(SEP),
-            stop_on=ROW_BREAK | NL + ~SEP,
+        ROW = (
+            SEP
+            + ZeroOrMore(
+                SkipTo(SEP | ROW_BREAK, ignore=IGNORE) + Optional(SEP),
+                stop_on=ROW_BREAK | NL + ~SEP,
+            )
+            + UniversalLineEnd().set_results_name("eol")
         )
 
         EMPTY_LINE = LineStart() + Optional(Regex(r"[ \t]+", flags=re.UNICODE)) + UniversalLineEnd()
         return (
             (StringStart() ^ Optional(EMPTY_LINE, default="\n"))
-            + OneOrMore(LineStart() + Group(ROW) + NL).set_parse_action(self.action)
+            + OneOrMore(LineStart() + Group(ROW)).set_parse_action(self.action)
             + (StringEnd() | Optional(EMPTY_LINE, default="\n"))
         )
diff --git a/jira2markdown/markup/text_effects.py b/jira2markdown/markup/text_effects.py
@@ -53,7 +53,7 @@ def expr(self) -> ParserElement:
         ELEMENT = Combine(
             TOKEN
             + (~White() & ~Char(self.TOKEN))
-            + SkipTo(TOKEN, ignore=IGNORE, fail_on="\n")
+            + SkipTo(TOKEN, ignore=IGNORE, fail_on=UniversalLineEnd())
             + TOKEN
             + FollowedBy(NON_ALPHANUMS | StringEnd()),
         )
@@ -173,8 +173,10 @@ def expr(self) -> ParserElement:
 
 class BlockQuote(AbstractMarkup):
     def action(self, tokens: ParseResults) -> str:
-        text = self.markup.transform_string("\n".join([line.lstrip() for line in tokens[0].strip().splitlines()]))
-        return "\n".join([f"> {line}" for line in text.splitlines()])
+        text = self.markup.transform_string(
+            "".join([line.lstrip() for line in tokens[0].strip().splitlines(keepends=True)])
+        )
+        return "".join([f"> {line}" for line in text.splitlines(keepends=True)])
 
     @property
     def expr(self) -> ParserElement:

diff --git a/jira2markdown/tokens.py b/jira2markdown/tokens.py
@@ -9,7 +9,7 @@ def __init__(self):
     def parseImpl(self, instring, loc, do_actions=True) -> ParseImplReturnType:
         if loc < len(instring):
             if instring.startswith("\r\n", loc):
-                return loc + 2, "\n"
+                return loc + 2, "\r\n"
             elif instring[loc] in ("\n", "\r"):
                 return loc + 1, "\n"
             else:

diff --git a/tests/markup/test_headings.py b/tests/markup/test_headings.py
@@ -12,3 +12,9 @@ def test_header_levels(self):
     def test_match_start_conditions(self):
         assert convert("  h2. Title") == "  ## Title"
         assert convert(" A  h2. Title") == " A  h2. Title"
+
+    def test_windows_line_breaks(self):
+        assert (
+            convert("Line before heading\r\nh1. Title text\r\nLine after heading")
+            == "Line before heading\r\n# Title text\r\nLine after heading"
+        )
diff --git a/tests/markup/test_lists.py b/tests/markup/test_lists.py
@@ -318,5 +318,5 @@ def test_list_indent(self):
     def test_windows_line_breaks(self):
         assert (
             convert("Line before list\r\n * Bulleted item 1\r\n * Bulleted item 2\r\n\r\nLine after list")
-            == "Line before list\n- Bulleted item 1\n- Bulleted item 2\n\nLine after list"
+            == "Line before list\r\n- Bulleted item 1\r\n- Bulleted item 2\r\n\r\nLine after list"
         )
diff --git a/tests/markup/test_mixed_content.py b/tests/markup/test_mixed_content.py
@@ -182,8 +182,8 @@ def render_expected(self, expected, text):
             return expected[0] % text
 
         first_line, next_line = expected
-        return "\n".join(
-            [first_line % line if i == 0 else next_line % line for i, line in enumerate(text.splitlines())]
+        return "".join(
+            [first_line % line if i == 0 else next_line % line for i, line in enumerate(text.splitlines(keepends=True))]
         )
 
     def test_headings(self, token, test_input, expected):

diff --git a/tests/markup/test_tables.py b/tests/markup/test_tables.py
@@ -148,10 +148,10 @@ def test_windows_line_breaks(self):
             "|cell 2-1|cell 2-2|cell 2-3|\r\n\r\n"
             "text after table"
         ) == (
-            "text before table:\n\n"
-            "|header 1|header 2|header 3|\n"
-            "|---|---|---|\n"
-            "|cell 1-1|cell 1-2|cell 1-3|\n"
-            "|cell 2-1|cell 2-2|cell 2-3|\n\n"
+            "text before table:\r\n\r\n"
+            "|header 1|header 2|header 3|\r\n"
+            "|---|---|---|\r\n"
+            "|cell 1-1|cell 1-2|cell 1-3|\r\n"
+            "|cell 2-1|cell 2-2|cell 2-3|\r\n\r\n"
             "text after table"
         )
diff --git a/tests/markup/test_text_effects.py b/tests/markup/test_text_effects.py
@@ -23,6 +23,9 @@ def test_match_end_conditions(self):
     def test_multiline(self):
         assert convert("*multiline\nbold*") == "\\*multiline\nbold\\*"
 
+    def test_multiline_windows_line_breaks(self):
+        assert convert("*multiline\r\nbold*") == "\\*multiline\r\nbold\\*"
+
     def test_single_token(self):
         assert convert("single *char") == r"single \*char"
 
@@ -229,6 +232,12 @@ def test_adjacent_text(self):
 """
         )
 
+    def test_windows_line_breaks(self):
+        assert (
+            convert("Preceding line\r\nbq. First quote\r\nbq. Second quote\r\nNext line")
+            == "Preceding line\r\n> First quote\r\n> Second quote\r\n\nNext line"
+        )
+
 
 class TestBlockQuote:
     def test_basic_conversion(self):
@@ -247,6 +256,12 @@ def test_basic_conversion(self):
 """
         )
 
+    def test_windows_line_breaks(self):
+        assert (
+            convert("\r\n{quote}\r\nhere is quotable\r\ncontent to be quoted\r\n{quote}\r\n")
+            == "\r\n> here is quotable\r\n> content to be quoted\r\n"
+        )
+
 
 class TestMonospaced:
     def test_basic_conversion(self):