001    package com.saelist.stx.parser;
002    
003    import java.io.*;
004    import java.util.*;
005    import org.apache.log4j.*;
006    import com.saelist.stx.*;
007    import com.saelist.util.Strings;
008    import com.saelist.stx.operators.*;
009    
010    
011    /** Parses a text that is structured by indentation. <p>
012      * Each level of indentation must be exactly two spaces.
013      * The only significant characters are leading spaces and new-lines. Other
014      * characters don't influence the parsing at all. However, a warning is
015      * logged if the ends with a tab. Any escaping,
016      * trimming and so on must be done on the resulting Pair hierarchy. That
017      * is what @{link com.saelist.stx.operator.Operator}s are for.
018      *
019      */
020    public class LstxParser {
021    
022      private static final int MAX_LINE = 2048;
023      private static int lines;
024    
025      public static Logger logger = Logger.getLogger(LstxParser.class);
026    
027      /** Parses <code>text</code> into a structure of @link{com.saelist.stx.Pair}s
028        * based on the indentation of the text.
029        * @param text to be parsed.
030        * @return a pseudo-root containing all the top level entries in text.
031        */
032      public static Pair parse(String text) {
033        try {
034          return parse(new StringReader(text));
035        } catch(IOException e) {
036          // Shouldn't happen on a StringReader
037          throw new RuntimeException(e);
038        }
039      }
040    
041      /** Parses the text from <code>file</code> into a structure of @link{com.saelist.stx.Pair}s
042        * based on the indentation of the text.
043        * @param file contains text to be parsed.
044        * @return a pseudo-root containing all the top level entries in text.
045        */
046      public static Pair parse(File file) throws IOException {
047        return parse(new BufferedReader(new FileReader(file)));
048      }
049    
050      /** Parses the text from <code>in</code> into a structure of @link{com.saelist.stx.Pair}s
051        * based on the indentation of the text.
052        * @param in provides the text to be parsed.
053        * @return a pseudo-root containing all the top level entries in text.
054        */
055      public static Pair parse(Reader in) throws IOException {
056        lines = 0;
057        Pair root = new Pair(null, -2, "root", -1, -2);
058        fillChildren(root, new PushbackReader(in, MAX_LINE));
059        return root;
060      }
061    
062      /** Parses indented text into a structure of pairs. Each level of indentation
063        * must be excatly 2 spaces and with no tabs. Empty Lines are skipped.
064        * @throws IOException on problems with <code>in</code>.
065        * @throws RuntimException if a line is indented with a TAB or if indentation
066        * is inconsistent.
067        */
068      private static Pair fillChildren(Pair pair, PushbackReader in) throws IOException {
069        String line = readLn(in);
070        while(line != null) {
071    
072          int indent = indentOf(line);
073          if(line.trim().equals(""))
074            /* pair.add(new Pair(pair, indent, line.substring(indent), lines, indent + 1)) */;
075          else if(indent == (pair).getIndent() + 2)
076            pair.add(fillChildren(new Pair(pair, indent, line.substring(indent), lines, indent + 1), in));
077          else if(indent <= (pair).getIndent()) {
078            in.unread('\n');
079            in.unread(line.toCharArray());
080            lines--;
081            return pair;
082          } else
083            throw new RuntimeException("Inconsistent indentation in line " + lines + ", column=" + indent + ", text='" + line + "'.");
084          line = readLn(in);
085        }
086        return pair;
087      }
088    
089    
090      /** @return the number of spaces at the beginning of the <code>line</code>.*/
091      private static int indentOf(String line) {
092        for(int i = 0; i < line.length(); i++) {
093          if(line.charAt(i) == '\t')
094            // throw new RuntimeException();
095            logger.warn("indentOf(): Encounterd TAB in line=" + lines + ", column=" + i + ", text='" + line + "'.");
096          if(line.charAt(i) != ' ')
097            return i;
098        }
099        return 0;
100      }
101    
102      /** Read the next line from <code>in</code>.
103        * @return the next line in <code>in</code> (without the newline) or null if
104        * no characters remain.
105        */
106      private static String readLn(Reader in) throws IOException {
107        StringBuffer result = new StringBuffer();
108        int ch = in.read();
109        if(ch < 0)
110          return null;
111        while(ch >= 0 && ch != '\n') {
112          result.append((char) ch);
113          ch = in.read();
114        }
115        lines++;
116        return result.toString();
117      }
118    
119      /** Writer the <code>pair</code> to <code>out</code>.
120        * @param out receives the result.
121        * @param pair to be written.
122        * @throws IOException on problems with <code>out</code>.
123        */
124      public static void unParse(Writer out, Pair pair) throws IOException {
125        unParse(out, pair, false);
126      }
127    
128      /** Writes the <code>pair</code> to <code>out</code>, joining single children
129        * on the parents' line after a "=".
130        * @param out receives the result.
131        * @param pair to be written.
132        * @param join weather to append the text of single children (with no
133        * siblings or children of their own) to their parents' line after a "=".
134        * @throws IOException on problems with <code>out</code>.
135        */
136      public static void unParse(Writer out, Pair pair, boolean join) throws IOException {
137        for(Iterator it = pair.getPairs(); it.hasNext(); )
138          unParse(out, (Pair) it.next(), 0, join);
139        out.flush();
140      }
141    
142      /** Writes the <code>pair</code> to <code>out</code>, joining single children
143        * on the parents' line after a "=".
144        * @throws IOException on problems with <code>out</code>.
145        * @param out receives the result.
146        * @param pair to be written.
147        * @param join weather to append the text of single children (with no
148        * siblings or children of their own) to their parents' line after a "=".
149        */
150      private static void unParse(Writer out, Pair pair, int indent, boolean join) throws IOException {
151        // out.write("|");
152        for(int i=0; i < indent; i++)
153          out.write(" ");
154        out.write(pair.getText());
155        if(join && (pair.size() == 1) && (pair.get(0).size() == 0)) {
156          out.write("=");
157          out.write(pair.getValue());
158          out.write("\n");
159        } else {
160          out.write("\n");
161          for(Iterator it = pair.getPairs(); it.hasNext(); )
162            unParse(out, (Pair) it.next(), indent + 2, join);
163        }
164      }
165    
166      /** Split on '=' and remove comments. */
167      public static void applyBasicProfile(Pair pair) {
168    
169        // Split on "=" before testing
170    
171        Operator splitter = new SplitOperator();
172        splitter.init(LstxParser.parse(
173            "config\n" +
174            "  pattern\n" +
175            "    =\n").get("config"));
176        splitter.visit(pair.select(".//*"));
177    
178        // Remove comments and preceding whitespace.
179    
180        Operator decommentor = new ReplaceOperator();
181        decommentor.init(LstxParser.parse(
182            "config\n" +
183            "  pattern\n" +
184            "    \\s*#.*\n").get("config"));
185        decommentor.visit(pair.select(".//*"));
186    
187      }
188    
189    
190    }