Skip to content

Commit

Permalink
Condense regex
Browse files Browse the repository at this point in the history
  • Loading branch information
nntrn committed Sep 30, 2024
1 parent 08e238e commit 519994c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 16 deletions.
2 changes: 1 addition & 1 deletion scripts/annotations.jq
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def create_activity_data:
created: .ZANNOTATIONCREATIONDATE,
location: .ZANNOTATIONLOCATION,
cfi: (.ZANNOTATIONLOCATION|[match("\\b[0-9]{1,4}\\b";"g").string|tonumber]|join(".")),
chapter: (if ((.ZFUTUREPROOFING5|length)>0) then .ZFUTUREPROOFING5 else (.ZANNOTATIONLOCATION|format_location) end),
chapter: (if ((.ZFUTUREPROOFING5|length)>0) then .ZFUTUREPROOFING5 else (.ZANNOTATIONLOCATION|format_location)? // .rangestart end),
rangestart: .ZPLLOCATIONRANGESTART
}) | sort_by(.id);

Expand Down
31 changes: 16 additions & 15 deletions scripts/books.jq
Original file line number Diff line number Diff line change
Expand Up @@ -74,32 +74,33 @@ def identify_subsection:
elif test("[Pp][rologue]{2,}") then "Prologue"
elif test("[gG][losary]{2,}") then "Glossary"
elif test("[aA][fterword]{3,}") then "Afterword"
elif test("[aA][pendix]{3,}";"i") then "APPENDIX \($string|gsub("^[0-9]";""))"
elif test("[aA][pendix]{3,}";"i") then "Appendix"
elif test("[fF][ront]{2,}") then "Front"
elif test("^[A-Z0-9 ]+$") then ($string |gsub("[^0-9]";"")|tonumber|tostring)
else null
end
;

def getnum($str): $str
| gsub("(?<w>[^0-9])(?<n>[0-9]+)";.w + " " + .n)
| [match("(\\b[0-9]+)";"g").string|tonumber]|last|tostring;

def format_chapter:
([.]|flatten ) as $input
| [ ($input|flatten| map(select(test("([cC]|[pP]|[sS])[\\w]*?[0-9]+"))) ),$input]
| flatten(2)
| first
| gsub("(epub|EPUB|xhtml|html|ji[0-9]+|sup[0-9]+|nav)[ ]?"; "")
| gsub("[cC][hapter ]{2,}(?<n>[0-9]+)"; "Chapter " + (.n|tonumber|tostring)? // .n; "x")
| gsub( "[sS][ection ]{2,}(?<n>[0-9]+)"; "Section " + (.n|tonumber|tostring)? // .n; "x")
| gsub( "[pP][art ]{1,}(?<n>[0-9]+)"; "Part " + (.n|tonumber|tostring)? // .n; "x")
| gsub( "^[cC][hapter]*?(?<n>[0-9]+)"; "Chapter " + (.n|tonumber|tostring)? // .n; "x")
| gsub( "^[pP](?<n>[ 0-9]+)"; "Page " + (.n|tonumber|tostring)? // .n; "x")
| gsub("(?<w>[a-zA-Z]{3,})(?<d>[0-9])"; .w + " " + (.d|tonumber|tostring)? // .n ;"x")
| gsub(".*[0-9]{6,}.*(?<n>[0-9]+)"; "x" + .n; "x")
([.]|flatten|unique) as $input
| ($input | join(" ")
| gsub("x?[0-9]{8,}"; .n; "x")) as $str
| $str |
if test("[cC][chapter]{1,}") then "Chapter " + getnum($str)
elif test("toc_marker") then getnum($str)
else
$str
| gsub("(epub|EPUB|[[:punct:]]xhtml|html|ji[0-9]+|sup[0-9]+|nav_|div[0-9]+)"; "")
| gsub("^[^a-zA-Z0-9]+";"")
end
;

def format_location:
[match("\\[([^\\]]+)\\]+";"g").captures[].string] as $raw
| ($raw|join(" ")|identify_subsection) as $sec
| (if $sec then $sec else ($raw|format_chapter) end)
| gsub("[_-]+";" ";"x")
| gsub("^[\\s]+|[\\s]+$";"")
;

0 comments on commit 519994c

Please sign in to comment.