001 package com.saelist.stx.parser;
002
003 import java.io.*;
004 import java.util.*;
005 import org.apache.log4j.*;
006 import com.saelist.stx.*;
007 import com.saelist.util.Strings;
008 import com.saelist.stx.operators.*;
009
010
011 /** Parses a text that is structured by indentation. <p>
012 * Each level of indentation must be exactly two spaces.
013 * The only significant characters are leading spaces and new-lines. Other
014 * characters don't influence the parsing at all. However, a warning is
015 * logged if the ends with a tab. Any escaping,
016 * trimming and so on must be done on the resulting Pair hierarchy. That
017 * is what @{link com.saelist.stx.operator.Operator}s are for.
018 *
019 */
020 public class LstxParser {
021
022 private static final int MAX_LINE = 2048;
023 private static int lines;
024
025 public static Logger logger = Logger.getLogger(LstxParser.class);
026
027 /** Parses <code>text</code> into a structure of @link{com.saelist.stx.Pair}s
028 * based on the indentation of the text.
029 * @param text to be parsed.
030 * @return a pseudo-root containing all the top level entries in text.
031 */
032 public static Pair parse(String text) {
033 try {
034 return parse(new StringReader(text));
035 } catch(IOException e) {
036 // Shouldn't happen on a StringReader
037 throw new RuntimeException(e);
038 }
039 }
040
041 /** Parses the text from <code>file</code> into a structure of @link{com.saelist.stx.Pair}s
042 * based on the indentation of the text.
043 * @param file contains text to be parsed.
044 * @return a pseudo-root containing all the top level entries in text.
045 */
046 public static Pair parse(File file) throws IOException {
047 return parse(new BufferedReader(new FileReader(file)));
048 }
049
050 /** Parses the text from <code>in</code> into a structure of @link{com.saelist.stx.Pair}s
051 * based on the indentation of the text.
052 * @param in provides the text to be parsed.
053 * @return a pseudo-root containing all the top level entries in text.
054 */
055 public static Pair parse(Reader in) throws IOException {
056 lines = 0;
057 Pair root = new Pair(null, -2, "root", -1, -2);
058 fillChildren(root, new PushbackReader(in, MAX_LINE));
059 return root;
060 }
061
062 /** Parses indented text into a structure of pairs. Each level of indentation
063 * must be excatly 2 spaces and with no tabs. Empty Lines are skipped.
064 * @throws IOException on problems with <code>in</code>.
065 * @throws RuntimException if a line is indented with a TAB or if indentation
066 * is inconsistent.
067 */
068 private static Pair fillChildren(Pair pair, PushbackReader in) throws IOException {
069 String line = readLn(in);
070 while(line != null) {
071
072 int indent = indentOf(line);
073 if(line.trim().equals(""))
074 /* pair.add(new Pair(pair, indent, line.substring(indent), lines, indent + 1)) */;
075 else if(indent == (pair).getIndent() + 2)
076 pair.add(fillChildren(new Pair(pair, indent, line.substring(indent), lines, indent + 1), in));
077 else if(indent <= (pair).getIndent()) {
078 in.unread('\n');
079 in.unread(line.toCharArray());
080 lines--;
081 return pair;
082 } else
083 throw new RuntimeException("Inconsistent indentation in line " + lines + ", column=" + indent + ", text='" + line + "'.");
084 line = readLn(in);
085 }
086 return pair;
087 }
088
089
090 /** @return the number of spaces at the beginning of the <code>line</code>.*/
091 private static int indentOf(String line) {
092 for(int i = 0; i < line.length(); i++) {
093 if(line.charAt(i) == '\t')
094 // throw new RuntimeException();
095 logger.warn("indentOf(): Encounterd TAB in line=" + lines + ", column=" + i + ", text='" + line + "'.");
096 if(line.charAt(i) != ' ')
097 return i;
098 }
099 return 0;
100 }
101
102 /** Read the next line from <code>in</code>.
103 * @return the next line in <code>in</code> (without the newline) or null if
104 * no characters remain.
105 */
106 private static String readLn(Reader in) throws IOException {
107 StringBuffer result = new StringBuffer();
108 int ch = in.read();
109 if(ch < 0)
110 return null;
111 while(ch >= 0 && ch != '\n') {
112 result.append((char) ch);
113 ch = in.read();
114 }
115 lines++;
116 return result.toString();
117 }
118
119 /** Writer the <code>pair</code> to <code>out</code>.
120 * @param out receives the result.
121 * @param pair to be written.
122 * @throws IOException on problems with <code>out</code>.
123 */
124 public static void unParse(Writer out, Pair pair) throws IOException {
125 unParse(out, pair, false);
126 }
127
128 /** Writes the <code>pair</code> to <code>out</code>, joining single children
129 * on the parents' line after a "=".
130 * @param out receives the result.
131 * @param pair to be written.
132 * @param join weather to append the text of single children (with no
133 * siblings or children of their own) to their parents' line after a "=".
134 * @throws IOException on problems with <code>out</code>.
135 */
136 public static void unParse(Writer out, Pair pair, boolean join) throws IOException {
137 for(Iterator it = pair.getPairs(); it.hasNext(); )
138 unParse(out, (Pair) it.next(), 0, join);
139 out.flush();
140 }
141
142 /** Writes the <code>pair</code> to <code>out</code>, joining single children
143 * on the parents' line after a "=".
144 * @throws IOException on problems with <code>out</code>.
145 * @param out receives the result.
146 * @param pair to be written.
147 * @param join weather to append the text of single children (with no
148 * siblings or children of their own) to their parents' line after a "=".
149 */
150 private static void unParse(Writer out, Pair pair, int indent, boolean join) throws IOException {
151 // out.write("|");
152 for(int i=0; i < indent; i++)
153 out.write(" ");
154 out.write(pair.getText());
155 if(join && (pair.size() == 1) && (pair.get(0).size() == 0)) {
156 out.write("=");
157 out.write(pair.getValue());
158 out.write("\n");
159 } else {
160 out.write("\n");
161 for(Iterator it = pair.getPairs(); it.hasNext(); )
162 unParse(out, (Pair) it.next(), indent + 2, join);
163 }
164 }
165
166 /** Split on '=' and remove comments. */
167 public static void applyBasicProfile(Pair pair) {
168
169 // Split on "=" before testing
170
171 Operator splitter = new SplitOperator();
172 splitter.init(LstxParser.parse(
173 "config\n" +
174 " pattern\n" +
175 " =\n").get("config"));
176 splitter.visit(pair.select(".//*"));
177
178 // Remove comments and preceding whitespace.
179
180 Operator decommentor = new ReplaceOperator();
181 decommentor.init(LstxParser.parse(
182 "config\n" +
183 " pattern\n" +
184 " \\s*#.*\n").get("config"));
185 decommentor.visit(pair.select(".//*"));
186
187 }
188
189
190 }