@** Palm Markup Language Generation.

This filter translates parsed body copy (emitted by |etextBodyParser|)
into Palm Markup Language source code, which it passes down the pipeline.

@<Class definitions@>=
class PalmGenerationFilter : public textFilter {
private:@/
    bool italics, inmath, quoth, hastitle, hasauthor,
    	 infoot, intable, firstchap;
    string htitle;  	    	    	    // Title of document
    string hauthor; 	    	    	    // Author of document
    string chapnumber;	    	    	    // Chapter number from text
    string chapname;	    	    	    // Chapter name
    string partext; 	    	    	    // Paragraph accumulation string
    int parline;    	    	    	    // Paragraph line counter
    int chapno;     	    	    	    // Chapter number (for anonymous chapters)
    int footnum;	    	    	    // Footnote number
    int footnest;   	    	    	    // Footnote nesting level
    string footnotes;	    	    	    // Footnotes saved for output at end
    string footpar; 	    	    	    // Footnote paragraph accumulator
    string footsave;	    	    	    // Save paragraph during footnote accumulation
    bool fitalics, fquoth;	    	    // Text processing modes saved during footnote
    textSubstituter transformer;    	    // Text substituter for substitute specials
    
    string quotePalmString(string s);
    
    static string pruneIndent(string s) {
    	assert(s != "");
    	return s.substr(s.find_first_not_of(' '));
    }
    
    virtual void emit(string s, textComponent *destination = NULL) {
    	if (infoot) {
	    footnotes += s + "\n";
	} else {
	    textFilter::emit(s, destination);
	}
    }
    
    void emitq(string s) {
    	emit(quotePalmString(s));
    }
    
    void generateFilledParagraph(bodyState state, string envtype,
    	char bracket, string text);

    void generateAlignedParagraph(bodyState state, string envtype,
    	char bracket, string text);

    bool isSubstitution(string cmd);
    
public:@/
    PalmGenerationFilter() {
    	italics = inmath = quoth = false;
	hastitle = hasauthor = false;
	intable = firstchap = infoot = false;
	footnest = footnum = 0;
	chapno = 0;
    }

    virtual ~PalmGenerationFilter() {
    }

    string componentName(void) {
        return "PalmGenerationFilter";
    }
    
    void put(string s);
};

@
The |put| method of the PalmGenerationFilter wraps Palm Markup Language commands
around the line-level structure of the text to achieve the
desired formatting.  Since almost all of the real work is
done upstream (by |etextBodyParserFilter|) and downstream
(by |quotePalmString|) there is relatively little that needs
doing here.

@<Class definitions@>=
void PalmGenerationFilter::put(string s) {
    bodyState state = DecodeBodyState(s[0]);
    char bracket = s[1];
    string text = s.substr(2);

    switch (state) {
	case BeginText:
	    @<Generate start of document in Palm@>;
	    
	case Declarations:
	    @<Process declarations in Palm@>;

	case DocumentTitle:
	    @<Process document title in Palm@>;

	case Author:
	    @<Process author in Palm@>;

	case ChapterNumber:
	    @<Process chapter number in Palm@>;

	case ChapterName:
	    @<Process chapter name in Palm@>;

	case InTextParagraph:
    	    generateFilledParagraph(state, "", bracket, text);
	    break;

	case InBlockQuote:
    	    generateFilledParagraph(state, "\\t", bracket, text);
	    break;

	case InRaggedRight:
	    generateAlignedParagraph(state, "", bracket, text);
	    break;

	case InRaggedLeft:
	    generateAlignedParagraph(state, "\\r", bracket, text);
	    break;

    	case InPreformattedTable:
	    if (bracket == Begin) {
	    	intable = true;
	    }
	    generateAlignedParagraph(state, "", bracket, text);
	    if (bracket == End) {
	    	intable = false;
	    }
	    break;

	case InCentred:
	    generateAlignedParagraph(state, "\\c", bracket, text);
	    break;

	case EndOfText:
	    if (footnum > 0) {
	    	emit(footnotes);    	    // Append footnotes to document
	    }
	    if (verbose) {
	    	cerr << "Palm: " <<
		    	(getLineNumber() + count(footnotes.begin(), footnotes.end(), '\n')) <<
			" lines output.\n";
	    }
	    break;

	default:
	    cerr << "*** State " << stateNames[state] << " " << bracket <<
		" not handled in PalmGenerationFilter ***\n";
	    exit(1);
    }
}

@
Generate the boilerplate at the start of a Palm Markup Language document.

@<Generate start of document in Palm@>=
    {
	time_t t = time(NULL);
	string stime = ctime(&t);
	stime = stime.substr(0, stime.length() - 1);

	emit("\\v Translated by " PRODUCT " " VERSION " (" REVDATE ") on " + stime + "\\v");
    }
    break;

@
Declarations are output before the start of the body, allowing them to
be used to special title generation, if desired.  Declarations are
an excellent place to define any substitutions to be applied to the
subsequent text.

@<Process declarations in Palm@>=
    if (bracket == Body) {
    	assert(etextBodyParserFilter::isLineSpecial(text));
	if (!isSubstitution(text)) {
	    emit(etextBodyParserFilter::specialCommand(text));
	}
    }
    break;


@
We save the document title, concatenating into a single line if
it spans two or more in the input text.  It will eventually be
used to declare the document database name and on the title
page of the output document.

@<Process document title in Palm@>=
    switch (bracket) {
    	case Begin:
	    htitle = "";
	    break;
	    
	case Body:
	    if (htitle != "") {
	    	htitle += " ";
	    }
	    htitle += quotePalmString(pruneIndent(text));
	    hastitle = true;
	    break;

	case Void:
	    hastitle = false;
	    htitle = "";
	    break;
    }
    break;

@
The author name is accumulated, concatenating multiple lines as
required. When we see the |End| bracket for the author
specification (or the |Void| bracket if no author is given), we
write the document header.  If no document title was specified,
the user will have to supply the name of the Palm database
when the PML file is compiled into a Palm Reader book.

@<Process author in Palm@>=
    switch (bracket) {
    	case Begin:
	    hauthor = "";
	    break;
	    
	case Body:
	    if (hauthor != "") {
	    	hauthor += " ";
	    }
	    hauthor += quotePalmString(pruneIndent(text));
	    break;

	case End:
	    hasauthor = true;@/@,
	    // Note fall-through

	case Void:
	    if (hastitle) {
    	    	emit("\\vTITLE=\"" + htitle + "\"\\v");
    	    	emit("\\c\\b" + htitle + "\\b");
	    	emit("\\c");
	    }
	    if (hasauthor) {
    	    	emit("\\c" + hauthor);
	    	emit("\\c");
	    }
	    break;
    }
    break;

@
We save the chapter number for output after the chapter name
is received.  The chapter number may span multiple lines.

@<Process chapter number in Palm@>=
    switch (bracket) {
	case Begin:
	    chapnumber = "";
	    break;

	case Body:
	    if (chapnumber != "") {
	    	chapnumber += " ";
	    }
	    chapnumber += quotePalmString(pruneIndent(text));
	    break;

	case Void:
	    chapnumber = "";@/@,
	    // Note fall-through
	    
	case End:
	    break;
    }
    break;

@
Chapter names cause \.{\\x} chapter tags to be generated with the
chapter title as its argument.  If only a chapter number is given,
it is used as the chapter title.  If both a number and name are
specified, they are concatenated with a colon after the
number and the resulting string is used as the chapter title.
|Void| chapter names generate
chapters numbered $1,2,\ldots n$.

@<Process chapter name in Palm@>=
    switch (bracket) {
	case Begin:
	    chapname = "";
	    break;

	case Body:
    	    if (chapname != "") {
    	    	chapname += " ";
    	    }
	    chapname += quotePalmString(pruneIndent(text));
	    break;

	case Void:
	    chapname = "";@/@,
	    // Note fall-through
	    
	case End:
	    chapno++;
	    emit("");
	    if ((chapname != "") || (chapnumber != "")) {
	    	string s = "\\x\\b";
		
	    	if (chapnumber != "") {
		    s += chapnumber;
		    if (chapname != "") {
		    	s += ": ";
		    }
		}
		emit(s + chapname + "\\b\\x");
	    } else {
	    	ostringstream numchap;
		
		numchap << "\\x\\b\\a151 " << chapno << "\\a151\\b\\x";
		emit(numchap.str());
	    }
	    break;
    }
    break;

@
The |generateFilledParagraph| function handles paragraphs with
text which flows from line to line to fill the page.  It is
used for normal body copy and indented block quotations, which
differ only in that the latter are wrapped by \.{\\t} markup
tags, passed as the |envtype| argument.  Existing indentation
on argument lines is discarded, and lines of the paragraph are
joined into one line per paragraph as required in PML.

@<Class definitions@>=
void PalmGenerationFilter::generateFilledParagraph(bodyState state, string envtype,
    	char bracket, string text) {
    string s;

    switch (bracket) {
	case Begin:
	    emit("");
	    quoth = false;
	    partext = "";
	    break;

	case Body:
	    if (etextBodyParserFilter::isLineSpecial(text)) {
	    	if (!isSubstitution(text)) {
	    	    partext += etextBodyParserFilter::specialCommand(text);
		}
	    } else {
		s = quotePalmString(pruneIndent(text));
		if (infoot) {
		    if (footpar != "") {
		    	footpar += ' ';
		    }
		    footpar += s;
		} else {
		    if (partext == "") {
			partext = envtype;
		    } else {
	    		partext += ' ';
		    }
		    partext += s;
		}
    	    }
	    break;

	case End:
	    emit(partext + envtype);
	    break;
	    
	case Void:
	    break;
    }
}

@
This function handles the various kinds of aligned paragraphs we encounter
in a document.  It wraps the contents of the paragraph in a
Palm Markup Language environment of the type specified by |envtype|.
The indentation used in the input text to identify the alignment of
the copy is removed, as indentation is significant in PML\null.
Preformatted tables are a special case; to make the most of limited
screen space, we normally strip the two leading spaces present
on lines of such tables.  If for some strange reason the input
document introduces a table with a line which begins in column 3
but a subsequent line of the table contains a nonblank in columns
1 or 2, that line will be output in its entirety.  This will
misalign the table, but it's better than discarding characters
in the input text.

@<Class definitions@>=
void PalmGenerationFilter::generateAlignedParagraph(bodyState state, string envtype,
    	char bracket, string text) {
    string s, l;
    
    switch (bracket) {
	case Begin:
	    emit("");
	    quoth = false;
	    parline = 0;
	    break;

	case Body:
	    s = "";
	    if (parline == 0) {
	    	s = envtype;
	    }
	    if (etextBodyParserFilter::isLineSpecial(text)) {
	    	if (!isSubstitution(text)) {
	    	    s += etextBodyParserFilter::specialCommand(text);
		} else {
		    break;
    	    	}
    	    } else {
		if (state == InPreformattedTable) {
	    	    l = quotePalmString(text.substr((text.substr(0, 2) == "  ") ? 2 : 0));
		} else {
	    	    l =  quotePalmString(pruneIndent(text));
		}
		if (infoot) {
		    if (footpar != "") {
		    	footpar += ' ';
		    }
		    footpar += l;
		    break;
		} else {
		    s += l;
		}
    	    }
	    emit(s);
	    parline++;
	    break;

	case End:
	    emit(envtype);
	    break;
	    
	case Void:
	    break;
    }
}

@
Translate text string |s| into PML, quoting metacharacters
and expanding Latin-1 characters to decimal escapes.  Italic
mode, conversion of ASCII quotes to open and close quotes,
ellipsis and em-dash translation, mathematics mode, and
footnote processing are performed at this level.  The
handling of footnotes which span multiple lines in the
input text interacts in subtle ways with
|generateFilledParagraph| and |generateAlignedParagraph|---don't
make any structural changes in footnote handling here
unless you completely grasp the implications for
callers of this function.

@<Class definitions@>=
string PalmGenerationFilter::quotePalmString(string s)
{
    string::iterator cp;
    string o = "";
    int c;
    static const string punctuation = "?!:;";	// Punctuation set after space for |frenchPunct|
    
    for (cp = s.begin(); cp < s.end(); cp++) {
    	c = (*cp) & 0xFF;
	
	if (c < ' ') {
	    @<Quote control character in Palm@>;
	} else if ((c >= 160) && (c <= 255)) {
	    @<Quote ISO 8859-1 character in Palm@>;
	} else if (c >= ' ' && c <= '~') {
            if (!inmath && !intable && c == '_') {
	    	@<Toggle italic text mode in Palm@>;
	    } else if (!intable && c == '\\' &&
	    	       ((cp + 1) < s.end()) && ((cp[1] == '(') || (cp[1] == ')'))) {
	    	@<Toggle math mode in Palm@>;
	    } else if (c == '\\') {
		o += "\\\\";
	    } else if (!inmath && !intable && ((cp + 2) < s.end()) &&
	    	       ((c == '[') || ((c == ' ') && (cp[1] == '[')) ||
                                   ((c == ' ') && (cp[1] == ' ') && (cp[2] == '[')))) {
		@<Begin footnote in Palm@>;
	    } else if (!inmath && !intable && c == ']') {
	    	@<End footnote in Palm@>;
	    } else if (!inmath && !intable && (c == '-') && ((cp + 1) < s.end()) && (cp[1] == '-')) {
	    	@<Translate em-dash in Palm@>;
    	    } else if (!inmath && !intable && (c == '.') && ((cp + 2) < s.end()) &&
	    	    	(cp[1] == '.') && (cp[2] == '.')) {
	    	@<Translate ellipsis in Palm@>;
	    } else if (!inmath && !intable && c == '"') {
	    	@<Convert ASCII quotes to open and close quotes in Palm@>;
	    } else {
	    	@<Output ASCII text character in Palm@>;
	    }
	}
        /* Note that other characters, specifically those in the
           range from 127 through 160, get dropped. */
    }
    o = transformer.substitute(o);  	    // Apply substitutions, if any
    return o;
}

@
This is a control character.  Emit as \.{\caret}{\it letter} unless it is
considered as white space (for example, carriage return and
line feed), in which case it's sent directly to the output.
	       
@<Quote control character in Palm@>=
   if (isspace(c)) {
        o += c;
    } else {
        o += "^";
	o += ('@@' + c);
    }
    
@
Palm Markup Language requires that all non-ASCII characters, even those
part of the ISO 8859-1 character set, be quoted using the
$\backslash$\.{a}{\it xxx} escape sequence.  We handle this
here.  In addition, if |frenchPunct| is enabled, we must check
for guillemets and insert the requisite non-breaking spaces
to set them off from the text.

@<Quote ISO 8859-1 character in Palm@>=
    ostringstream isochar;

    isochar << "\\a" << setw(3) << setfill('0') << c;

    if (!inmath && frenchPunct && (c == C_LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK) &&
    	(cp != s.end()) && (!isspace((cp[1] & 0xFF)))) {
    	o += isochar.str();
	o += "\\a160";
    } else if (!inmath && frenchPunct && (c == C_RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK) &&
    	(cp != s.begin())) {
	o += "\\a160";
	o += isochar.str();
    } else {
    	o += isochar.str();
    }

@
The underscore character, ``\.{\_}'', toggles text between the normal
roman and italic fonts.
  
@<Toggle italic text mode in Palm@>=
    italics = !italics;
    if (italics) {
        o += "\\i";
    } else {
        o +="\\i";
    }

@
PML doesn't support mathematics. When we encounter mathematics
in the text, we simply output it as text.  The user can,
afterward, convert the equation to an image with use
\TeX to{\tt GIF} or an equivalent tool and insert an image.
In the meanwhile, we simply encode the \LaTeX\ equation
to text, quoting special characters as required.  Even though
we don't directly support mathematics, we need to know
when we're in it, since characters such as ``\.{\uline}''
and ``\.{[}'' are regular text characters, not markup,
in math mode.
    
@<Toggle math mode in Palm@>=
    inmath = cp[1] == '(';
    o += "\\\\";

@
Footnotes are represented by a number enclosed in square
brackets, linked to the footnote at the end of the document,
which has a link back to the text.  At the start of a footnote
we append the footnote mark to the output accumulation string
|o|, then save it in |footsave|, setting |infoot| to indicate
we're accumulating a footnote.  While |infoot| is set, |emit|
diverts output to the string |footnotes|, where it is
simply concatenated at the end.  This string will eventually
be appended to the end of the output document when we
reach the end of the input text.

We don't allow footnotes to be nested.  If the user attempts to
nest footnotes, we issue a warning and simply emit the
nested footnote in-line (within the outer footnote), enclosed
in square brackets.

@<Begin footnote in Palm@>=
    footnest++;
    if (footnest > 1) {
    	issueMessage("Cannot nest footnotes in Palm Markup Language output.");
	o += "[";
    } else {
    	ostringstream flink;
	
    	footnum++;
	flink << "\\Q=\"b" << footnum << "\"\\q=\"#f" << footnum << "\"[" << footnum << "]\\q";
	o += flink.str();
	footsave = o;
    	infoot = true;
	fitalics = italics;
	fquoth = quoth;
	italics = quoth = false;
	o = "";
	footpar = "";
	if (footnum == 1) {
	    emit("\\x\\a185 \\a178 \\a179 \\a133\\x"); // Footnote chapter: ``$^1$~$^2$~$^3\ldots$''
	}
	flink.str("");
	flink << "\\p\\Q=\"f" << footnum << "\"";
	emit(flink.str());
    }
    if ((c == ' ') && ((cp + 1) < s.end())) {
        if (cp[1] == ' ') {
            cp++;
        }
        cp++;
    }

@
Close a footnote when the right bracket is encountered.
The footnote paragraph, assembled in |footpar| with the
cooperation of the caller of |quotePalmString| if the
footnote spans multiple lines, is appended to the
|footnotes| array by calling |emit| while |infoot|
remains set.  Following the footnote a back link to the
body copy where the footnote appeared is generated.
    
@<End footnote in Palm@>=
    if (footnest == 0) {
    	issueMessage("Mismatched end of footnote (\"]\") bracket.");
    } else {
    	footnest--;
	if (footnest > 0) {
    	    o += ']';   	// Nested footnote---just emit closing bracket
	} else {
	    ostringstream blink;
	    
	    if (o != "") {
	    	if (footpar != "") {
		    footpar += ' ';
		}
		footpar += o;
	    }
	    blink << "\\b" << footnum << ".\\b  ";
	    emit(blink.str() + footpar);
	    blink.str("");
	    blink << "\\c\\l\\q=\"#b" << footnum << "\"<<<\\q\\l";
	    emit(blink.str());
	    emit("\\c");
    	    infoot = false;
	    italics = fitalics;
	    quoth = fquoth;
	    o = footsave;
	}
    }

@
Two adjacent hyphens, ``\.{-}\.{-}'' denote an {\it em} dash in
an ASCII Etext.  Translate this sequence into the em-dash symbol
used by PML.

@<Translate em-dash in Palm@>=
    o += "\\a151";
    cp++;

@
Three consecutive periods are translated into a Palm
ellipsis character.
    
@<Translate ellipsis in Palm@>=
   o += "\\a133";
   cp += 2;
    
@
ASCII quote characters are translated into open and close quote symbols.
Note that the flag |quoth| is unconditionally reset at the end of
a paragraph so that mismatched quotes won't propagate beyond one paragraph.
This allows continued quotes in multiple paragraphs to work properly.
We also save and restore |quoth| around footnotes so quote
matching works when a footnote appears within quotes.

@<Convert ASCII quotes to open and close quotes in Palm@>=
    o += quoth ? "\\a148" : "\\a147";
    quoth = !quoth;
    
@
Output a text character.  Some Palm Markup Language metacharacters require
backslash quoting in any mode, others only when not in math mode.  PML
specifies that only a single space appear after punctuation; we suppress
multiple spaces here except when generating a preformatted table.

@<Output ASCII text character in Palm@>=
   if (!inmath && frenchPunct && (punctuation.find_first_of(c) != string::npos) &&
    	(((cp + 1) == s.end()) || (cp[1] == ' ') ||
	 ((cp[1] & 0xFF) == C_RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK) || (cp[1] == ','))) {
	o += "\\a160";
	o += c;
    } else {
    	if (intable || (c != ' ') || (o == "") || (o[o.length() - 1] != ' ')) {
            o += c;
	}
    }

@
Special commands which define text substitutions are recognised by
this method, parsed, and added to the substitution list.

@<Class definitions@>=
bool PalmGenerationFilter::isSubstitution(string cmd)
{
    string s = etextBodyParserFilter::specialCommand(cmd);
    bool isSub = false, bogus = true;
    unsigned int n, m, l;
    char delim;
    
    if (s.find("Substitute ") == 0) {
    	isSub = true;
    	s = s.substr(11);
	n = s.find_first_not_of(' ');
	if (n != string::npos) {
	    delim = s[n];
	    m = s.find(delim, n + 1);
	    if (m != string::npos) {
	    	l = s.find(delim, m + 1);
		if (l != string::npos) {
		    bogus = false;
		    transformer.addSubstitution(s.substr(n + 1, (m - n) - 1),
		    	s.substr(m + 1, (l - m) - 1));
		}
	    }
	}
	if (bogus) {
	    issueMessage("Invalid Palm Substitute special", cerr);
	    issueMessage(auditFilter::quoteArbitraryString(cmd), cerr);
	}
    }
    return isSub;
}
