-
Notifications
You must be signed in to change notification settings - Fork 2
/
txt2pdf.py
251 lines (208 loc) · 8.93 KB
/
txt2pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""txt2pdf - Convert one or more text files to a PDF document using LaTeX typesetting.
This tool was developed for typesetting novels and short story collections written in pure text files.
The goal is to keep the text files as clean as possible with a minimum of structured formatting, i.e. a harsh subset of Markdown.
"""
import os
import sys
import argparse
import subprocess
import tempfile
import shutil
import glob
import re
import jinja2
import langdetect
# Exception classes.
class InvalidInputTxtFileException(Exception):
def __init__(self, filename=None):
self.filename = filename
def __str__(self):
if self.filename is not None:
return self.filename
class ErrorWhenExecutingLatexException(Exception):
def __init__(self, completed_process=None):
self.completed_process = completed_process
def __str__(self):
if self.completed_process is not None:
return self.completed_process.stderr
TEX_ITALIC_PRE_STR = r"\emph{"
TEX_ITALIC_POST_STR = r"}"
def convert_simplified_markdown_to_latex(content):
# Pre-process text to make it properly LaTeX formatted
content = content.replace('"', "''") # proper citation marks
content = content.replace(' - ', ' -- ') # proper LaTeX dash
content = content.replace('&', r'\&') # proper '&' character
content = content.replace('#', r'\#') # proper '#' character
content = re.sub(r'\[\[.*?\]\]', '', content) # Remove text between [[ ... ]] delimiters
content = re.sub(r"\n- ", r"\n-- ", content) # replace dash at the beginning of line
# Convert text between underscores to emphasized formatting.
emphasized_text_state = False
pos = content.find('_')
while (pos >= 0):
if emphasized_text_state:
content = content[0:pos] + TEX_ITALIC_POST_STR + content[pos+1:]
pos += len(TEX_ITALIC_POST_STR) - 1 # minus one for underscore
emphasized_text_state = False
else:
content = content[0:pos] + TEX_ITALIC_PRE_STR + content[pos+1:]
pos += len(TEX_ITALIC_PRE_STR) - 1 # minus one for underscore
emphasized_text_state = True
pos = content.find('_')
if emphasized_text_state:
print("Warning: Unfinished emphasized text marker!", file=sys.stderr)
paragraphs = content.splitlines()
title = paragraphs[0]
paragraphs = paragraphs[1:]
new_paragraphs = []
# Pre-process various quotes
verse = False
quote = False
for paragraph in paragraphs:
if paragraph[0:4] == " ":
if not verse:
# Start of verse
new_paragraphs.append('\\begin{verse}')
new_paragraphs.append(paragraph.lstrip() + '\\\\')
verse = True
continue
else:
# After first line of verse
new_paragraphs.append(paragraph.lstrip() + '\\\\')
continue
elif verse:
# End of verse
new_paragraphs.append('\\end{verse}')
verse = False
if paragraph[0:2] == "> ":
if not quote:
# Start of quote
new_paragraphs.append('\\begin{quote}')
new_paragraphs.append(paragraph[2:])
quote = True
continue
else:
# After first line of quote
new_paragraphs.append('\\\\' + paragraph[2:])
continue
elif quote:
# End of quote
new_paragraphs.append('\\end{quote}')
quote = False
if paragraph == "*":
new_paragraphs.append('\\vspace{6mm}')
else:
new_paragraphs.append(paragraph)
content = "\n".join(new_paragraphs)
return (title, content)
def preprocess_input(args):
metadata = {}
metadata['title'] = ' '.join(args.title)
metadata['author'] = ' '.join(args.author)
metadata['multiple_chapters'] = len(args.sources) > 1
metadata['wide_line_spacing'] = args.wide_line_spacing
metadata['chapters'] = []
metadata['is_windows'] = (os.name == 'nt')
lang_detect_input = None
# Process inputs
if args.basepath is not None:
basepath = args.basepath
else:
basepath = '../'
for path in args.sources:
path = os.path.join(basepath, path)
try:
with open(path, 'r') as f:
content = f.read()
except IOError:
raise InvalidInputTxtFileException(path)
if lang_detect_input is None:
# Use content from first input to detect language.
lang_detect_input = content
(chapter_title, chapter_content) = convert_simplified_markdown_to_latex(content)
chapter_metadata = {'title': chapter_title, 'content': chapter_content}
metadata['chapters'].append(chapter_metadata)
# Detect language
detected_language = langdetect.detect(lang_detect_input)
language_dict = {
'sv': 'swedish',
'en': 'english'
}
if detected_language in language_dict:
metadata['language'] = language_dict[detected_language]
else:
print("Add support for more languages!", file=sys.stderr)
metadata['language'] = None
return metadata
def find_latex_binary():
if os.name == "posix":
if os.path.isfile("/usr/bin/pdflatex"):
return "pdflatex"
return None
elif os.name == "nt":
for filename in glob.iglob("c:/Program Files/**/pdflatex.exe", recursive=True):
return filename
for filename in glob.iglob("c:/Program Files (x86)/**/pdflatex.exe", recursive=True):
return filename
home = os.path.expanduser("~")
for filename in glob.iglob(home + "/**/pdflatex.exe", recursive=True):
return filename
return None
return None
def generate_latex_source(metadata, tex_path):
template_dir = os.path.dirname(os.path.realpath(__file__))
env = jinja2.Environment(loader=jinja2.FileSystemLoader(template_dir), trim_blocks=True, lstrip_blocks=True)
tex_template = env.get_template('template.tex')
with open(tex_path, 'w') as tex_file:
tex_file.write(tex_template.render(metadata))
def generate_pdf_output(latex_bin, tex_path, temp_dir):
if os.name == 'nt':
result = subprocess.run([latex_bin, tex_path, "-quiet", "-halt-on-error", "-output-directory", temp_dir], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
else:
result = subprocess.run([latex_bin, tex_path, "-halt-on-error", "-output-directory={}".format(temp_dir)], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
raise ErrorWhenExecutingLatexException(result)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Create a PDF out of one or more text files.')
parser.add_argument('sources', metavar='path', type=str, nargs='+', help='text file(s)')
parser.add_argument('--title', nargs='+', help='Document title.')
parser.add_argument('--author', nargs='+', help='Document author.')
parser.add_argument('--basepath', help='Base path of text file(s).')
parser.add_argument('--output', default='nameless', help='Output filename.')
parser.add_argument('--wide-line-spacing', action='store_true', help='Extra wide line spacing.')
args = parser.parse_args()
try:
print("Looking for LaTeX binaries on your system...")
latex_bin = find_latex_binary()
if latex_bin is None:
print("Could not find LaTeX on your system!")
sys.exit(1)
print("Processing input...")
metadata = preprocess_input(args)
temp_dir = tempfile.mkdtemp()
tex_path = os.path.join(temp_dir, args.output + ".tex")
output_filename = args.output + ".pdf"
pdf_path = os.path.join(temp_dir, output_filename)
print("Generating intermediate files...")
generate_latex_source(metadata, tex_path)
print("Generating PDF '{}'...".format(output_filename))
generate_pdf_output(latex_bin, tex_path, temp_dir)
#shutil.copy(pdf_path, "./")
print("Cleaning up intermediate files...")
shutil.rmtree(temp_dir)
except InvalidInputTxtFileException as e:
print("Couldn't find input file '{}'!".format(e.filename), file=sys.stderr)
sys.exit(1)
except ErrorWhenExecutingLatexException as e:
print("Something bad happened during LaTeX execution!\n")
if e.completed_process.stderr is not None:
print(e.completed_process.stderr)
print("Copying generated tex file and log file for debugging...\n")
shutil.copy(os.path.join(temp_dir, args.output + ".log"), "./")
shutil.copy(os.path.join(temp_dir, args.output + ".tex"), "./")
shutil.rmtree(temp_dir)
sys.exit(1)
#except:
# print("Unexpected error: {}".format(sys.exc_info()[0]))
# print(str(sys.exc_info()[2]))