Browse code

Initial commit

asilvestre authored on 12/12/2012 15:43:12
Showing 10 changed files
... ...
@@ -1,4 +1,4 @@
1 1
 jpurexml
2 2
 ========
3 3
 
4
-XML parser in pure Java code so tools like Google's PlayN can transcompile it to all its target platforms
5 4
\ No newline at end of file
5
+Simple all-in-memory XML parser in pure Java code so tools like Google's PlayN can transcompile it to all its target platforms
6 6
\ No newline at end of file
7 7
new file mode 100644
... ...
@@ -0,0 +1,107 @@
1
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3
+	<modelVersion>4.0.0</modelVersion>
4
+	<groupId>com.github.asilvestre</groupId>
5
+	<artifactId>jpurexml</artifactId>
6
+	<version>0.9</version>
7
+	<packaging>jar</packaging>
8
+	<name>jpurexml</name>
9
+	<url>https://github.com/asilvestre/jpurexml</url>
10
+	<description>Simple all-in-memory XML parser coded in pure Java code so tools like Google's PlayN can transcompile it to all its target platforms</description>
11
+
12
+	<licenses>
13
+		<license>
14
+			<name>The Apache Software License, Version 2.0</name>
15
+			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
16
+			<distribution>repo</distribution>
17
+		</license>
18
+	</licenses>
19
+
20
+	<scm>
21
+		<url>scm:git:https://github.com/asilvestre/jpurexml.git</url>
22
+		<connection>scm:git:https://github.com/asilvestre/jpurexml.git</connection>
23
+	</scm>
24
+
25
+	<developers>
26
+		<developer>
27
+			<id>asilvestre</id>
28
+			<name>Antoni Silvestre</name>
29
+			<email>antoni.silvestre@gmail.com</email>
30
+		</developer>
31
+	</developers>
32
+
33
+	<properties>
34
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
35
+		<github.global.server>github</github.global.server>
36
+	</properties>
37
+
38
+	<build>
39
+		<plugins>
40
+			<plugin>
41
+				<groupId>org.apache.maven.plugins</groupId>
42
+				<artifactId>maven-compiler-plugin</artifactId>
43
+				<configuration>
44
+					<source>1.5</source>
45
+					<target>1.5</target>
46
+				</configuration>
47
+			</plugin>
48
+			<plugin>
49
+				<groupId>com.github.github</groupId>
50
+				<artifactId>site-maven-plugin</artifactId>
51
+				<version>0.7</version>
52
+				<configuration>
53
+					<message>Creating Javadocs for ${project.version}</message>
54
+					<repositoryName>JavaXmlToJson</repositoryName>
55
+					<repositoryOwner>antonisilvestre</repositoryOwner>
56
+				</configuration>
57
+				<executions>
58
+					<execution>
59
+						<goals>
60
+							<goal>site</goal>
61
+						</goals>
62
+						<phase>site</phase>
63
+					</execution>
64
+				</executions>
65
+			</plugin>
66
+			<plugin>
67
+				<groupId>org.apache.maven.plugins</groupId>
68
+				<artifactId>maven-gpg-plugin</artifactId>
69
+				<executions>
70
+					<execution>
71
+						<id>sign-artifacts</id>
72
+						<phase>verify</phase>
73
+						<goals>
74
+							<goal>sign</goal>
75
+						</goals>
76
+					</execution>
77
+				</executions>
78
+			</plugin>
79
+		</plugins>
80
+	</build>
81
+
82
+	<reporting>
83
+		<plugins>
84
+			<plugin>
85
+				<groupId>org.apache.maven.plugins</groupId>
86
+				<artifactId>maven-javadoc-plugin</artifactId>
87
+				<version>2.9</version>
88
+				<configuration>
89
+				</configuration>
90
+			</plugin>
91
+		</plugins>
92
+	</reporting>
93
+
94
+	<dependencies>
95
+		<dependency>
96
+			<groupId>junit</groupId>
97
+			<artifactId>junit</artifactId>
98
+			<version>3.8.1</version>
99
+			<scope>test</scope>
100
+		</dependency>
101
+		<dependency>
102
+			<groupId>com.github.github</groupId>
103
+			<artifactId>site-maven-plugin</artifactId>
104
+			<version>0.7</version>
105
+		</dependency>
106
+	</dependencies>
107
+</project>
0 108
new file mode 100644
... ...
@@ -0,0 +1,48 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+/**
20
+ * Generic utilities for the XML parser and the JSON converter
21
+ */
22
+public class Utils {
23
+	
24
+	/**
25
+	 * Simple replace string function, String.ReplaceAll uses regular expressions internally and might not
26
+	 * transcompile correctly to all platforms
27
+	 * @param input
28
+	 * @param toReplace
29
+	 * @param replacement
30
+	 * @return
31
+	 */
32
+	public static String ReplaceStr(String input, String toReplace, String replacement)
33
+	{
34
+		String res = input;
35
+		
36
+		int replacePos = input.indexOf(toReplace);
37
+		while (replacePos != -1)
38
+		{
39
+			res = String.format("%s%s%s", res.substring(0, replacePos), replacement,
40
+					res.substring(replacePos + toReplace.length()));
41
+			
42
+			replacePos = res.indexOf(toReplace, replacePos + replacement.length());
43
+		}
44
+		
45
+		return res;
46
+	}
47
+
48
+}
0 49
new file mode 100644
... ...
@@ -0,0 +1,40 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+/**
20
+ * Main object that describes an XML document
21
+ */
22
+public class XmlDoc {
23
+	/**
24
+	 * Has the XML prologue, that is the initial '<?xml' tag with its version
25
+	 * and encoding
26
+	 */
27
+	public XmlPrologue prologue = new XmlPrologue();
28
+
29
+	/**
30
+	 * Has the root tag for the XML document
31
+	 */
32
+	public XmlTag root = new XmlTag();
33
+
34
+	@Override
35
+	public String toString() {
36
+		String res = String.format("%s%s", prologue.toString(), root.toString());
37
+
38
+		return res;
39
+	}
40
+}
0 41
new file mode 100644
... ...
@@ -0,0 +1,40 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+/**
20
+ * 
21
+ */
22
+public class XmlParseException extends Exception{
23
+	
24
+	private static final long serialVersionUID = 1906896722521922104L;
25
+
26
+	public XmlParseException(String msg, int pos)
27
+	{
28
+		super(String.format("%s at %d", msg, pos));
29
+		
30
+		this.pos = pos;
31
+	}
32
+	
33
+	public int GetPos()
34
+	{
35
+		return pos;
36
+	}
37
+	
38
+	private int pos;
39
+
40
+}
0 41
new file mode 100644
... ...
@@ -0,0 +1,875 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+import java.util.HashMap;
20
+import java.util.LinkedList;
21
+import java.util.TreeSet;
22
+
23
+/**
24
+ * Class to hold static methods to parse an XML string.
25
+ */
26
+public class XmlParser {
27
+
28
+	/**
29
+	 * Parse a String with an XML into an XmlDoc object
30
+	 * 
31
+	 * @param xml
32
+	 * @throws XmlParseException
33
+	 */
34
+	public static XmlDoc parseXml(String xml) throws XmlParseException {
35
+		XmlDoc res = new XmlDoc();
36
+
37
+		// First of all removing all the comments from the XML
38
+		String procXml = removeComments(xml);
39
+
40
+		// Parsing XML prologue
41
+		int prologEnd = parsePrologue(procXml, res.prologue);
42
+
43
+		// Parsing the XML body
44
+		parseTag(procXml, prologEnd, res.root);
45
+
46
+		return res;
47
+	}
48
+
49
+	/**
50
+	 * Remove XML comments
51
+	 * 
52
+	 * @param xml
53
+	 * @return input XML without comments
54
+	 */
55
+	private static String removeComments(String xml) throws XmlParseException {
56
+		String res = xml;
57
+
58
+		int commentPos = res.indexOf("<!--");
59
+		while (commentPos != -1) {
60
+			int commentEnd = res.indexOf("-->");
61
+
62
+			if (commentEnd == -1) {
63
+				throw new XmlParseException("Missing comment ending '-->'", commentPos);
64
+			}
65
+
66
+			res = String.format("%s%s", res.substring(0, commentPos), res.substring(commentEnd + 3));
67
+
68
+			commentPos = res.indexOf("<!--");
69
+		}
70
+
71
+		return res;
72
+	}
73
+
74
+	/**
75
+	 * Parses the first line of each XML which states its version and encoding
76
+	 * 
77
+	 * @param xml
78
+	 * @param prologue
79
+	 * @return position where the encoding finishes
80
+	 */
81
+	private static int parsePrologue(String xml, XmlPrologue prologue) {
82
+		prologue.version = "1.0";
83
+		prologue.encoding = "UTF-8";
84
+
85
+		int prologueStart = xml.indexOf("<?xml");
86
+		int prologueEnd = xml.indexOf("?>", prologueStart);
87
+
88
+		if (prologueStart != -1 && prologueEnd != -1) {
89
+			String prologueString = xml.substring(prologueStart + 5, prologueEnd);
90
+
91
+			HashMap<String, String> prologueAttrs = new HashMap<String, String>();
92
+			parseAttributeList(prologueString, 0, prologueAttrs);
93
+
94
+			if (prologueAttrs.containsKey("version")) {
95
+				prologue.version = prologueAttrs.get("version");
96
+			}
97
+
98
+			if (prologueAttrs.containsKey("encoding")) {
99
+				prologue.encoding = prologueAttrs.get("encoding");
100
+			}
101
+		}
102
+
103
+		return prologueEnd != -1 ? prologueEnd + "?>".length() : 0;
104
+	}
105
+
106
+	/**
107
+	 * Parse a tag and its children
108
+	 * 
109
+	 * @param xml
110
+	 * @param pointer
111
+	 *            position from where to start parsing
112
+	 * @param tag
113
+	 *            output parameter where the tag information will be put
114
+	 * @return position where it has stopped parsing
115
+	 * @throws XmlParseException
116
+	 */
117
+	private static int parseTag(String xml, int pointer, XmlTag tag) throws XmlParseException {
118
+		int res;
119
+
120
+		// Parsing the name and attributes of the tag
121
+		int headerEnd = parseTagHeader(xml, pointer, tag);
122
+
123
+		// If the tag wasn't an empty tag (finishes right away with a '/>') look
124
+		// for children content
125
+		if (!tag.empty) {
126
+			// First looking for tag content which is not a children XML tag
127
+			int childrenPos = parseTagContent(xml, headerEnd, tag);
128
+
129
+			while (hasChildren(xml, childrenPos, tag.name)) {
130
+				XmlTag child = new XmlTag();
131
+				childrenPos = parseTag(xml, childrenPos, child);
132
+
133
+				tag.children.add(child);
134
+
135
+				// As far as I know there could be child tags and content mixed,
136
+				// TODO: I am not planning on using this, so I'll just append
137
+				// all the content in one string
138
+				// but I'm not preserving the order between tags and chunks of
139
+				// content
140
+				childrenPos = parseTagContent(xml, childrenPos, tag);
141
+			}
142
+
143
+			res = parseEndTag(xml, childrenPos, tag.name);
144
+		} else {
145
+			res = headerEnd;
146
+		}
147
+
148
+		return res;
149
+	}
150
+
151
+	/**
152
+	 * All the different states the tag header parser can be in.
153
+	 */
154
+	private enum TagHeaderStates {
155
+		Init, TagStart, Name, AttrList, EmptyTagEnd, End, Invalid
156
+	}
157
+
158
+	/**
159
+	 * Enumeration with all the possible events we can receive when parsing an
160
+	 * tag header.
161
+	 */
162
+	private enum TagHeaderActions {
163
+		Space, TagInit, NameChar, Slash, TagEnd, Invalid
164
+	}
165
+
166
+	/**
167
+	 * Structure that describes the tag header parser state machine. There is an
168
+	 * array entry for each header parser state, and for each of these there's
169
+	 * an array with an entry for each tag header parser action that describes
170
+	 * to which state should go when receiving that action. For instance in the
171
+	 * tag header name state if we receive another character we stay in the tag
172
+	 * header name state, but if we receive a ' ' we go the attribute list
173
+	 * state.
174
+	 */
175
+	private static final TagHeaderStates[][] TagHeaderStateMachine = new TagHeaderStates[][] {
176
+			// Init state
177
+			{ TagHeaderStates.Init, TagHeaderStates.TagStart, TagHeaderStates.Invalid, TagHeaderStates.Invalid,
178
+					TagHeaderStates.Invalid, TagHeaderStates.Invalid },
179
+			// Tag start state '<'
180
+			{ TagHeaderStates.TagStart, TagHeaderStates.Invalid, TagHeaderStates.Name, TagHeaderStates.Invalid,
181
+					TagHeaderStates.Invalid, TagHeaderStates.Invalid },
182
+			// Tag name state '<' + ' tagname '
183
+			{ TagHeaderStates.AttrList, TagHeaderStates.Invalid, TagHeaderStates.Name, TagHeaderStates.EmptyTagEnd,
184
+					TagHeaderStates.End, TagHeaderStates.Invalid },
185
+			// Attribute list state 'key='val' key2='val'' (this will be
186
+			// processed in its own state machine)
187
+			{ TagHeaderStates.AttrList, TagHeaderStates.Invalid, TagHeaderStates.Invalid, TagHeaderStates.EmptyTagEnd,
188
+					TagHeaderStates.End, TagHeaderStates.Invalid },
189
+			// Empty tag end state, '/' + '>'
190
+			{ TagHeaderStates.Invalid, TagHeaderStates.Invalid, TagHeaderStates.Invalid, TagHeaderStates.Invalid,
191
+					TagHeaderStates.End, TagHeaderStates.Invalid }, };
192
+
193
+	/**
194
+	 * Attribute parser data such as where is the position where the attribute
195
+	 * name starts and so forth
196
+	 */
197
+	private static class TagHeaderParserData {
198
+		public int nameStart = 0;
199
+		public int nameEnd = 0;
200
+		public HashMap<String, String> attributes = new HashMap<String, String>();
201
+		public boolean empty = false;
202
+	}
203
+
204
+	/**
205
+	 * Parse the tag name and attribute list
206
+	 * 
207
+	 * @param xml
208
+	 * @param pointer
209
+	 * @param tag
210
+	 * @return position from where to continue parsing
211
+	 * @throws XmlParseException
212
+	 */
213
+	private static int parseTagHeader(String xml, int pointer, XmlTag tag) throws XmlParseException {
214
+		TagHeaderStates state = TagHeaderStates.Init;
215
+		TagHeaderParserData parserData = new TagHeaderParserData();
216
+
217
+		int i = pointer;
218
+		boolean done = i >= xml.length();
219
+		while (!done) {
220
+			// From the current character determine its corresponding action in
221
+			// the state machine
222
+			char nextChar = xml.charAt(i);
223
+			TagHeaderActions action = parseCharIntoTagHeaderAction(nextChar);
224
+
225
+			// Apply the action to the current state of the state machine and
226
+			// obtain its resulting new state
227
+			TagHeaderStates newState = TagHeaderStateMachine[state.ordinal()][action.ordinal()];
228
+
229
+			// Process this state transition
230
+			if (state != newState) // In this parser interesting stuff only
231
+									// happens when we change state
232
+			{
233
+				i = processTagHeaderStateTransition(xml, i, state, newState, parserData);
234
+			} else {
235
+				i++;
236
+			}
237
+
238
+			state = newState;
239
+
240
+			done = i >= xml.length() || state == TagHeaderStates.End || state == TagHeaderStates.Invalid;
241
+		}
242
+
243
+		// If the tag header parsing was successful store the name and
244
+		// attributes in the XML tag object
245
+		if (state == TagHeaderStates.End) {
246
+			String name = xml.substring(parserData.nameStart, parserData.nameEnd);
247
+
248
+			tag.name = name;
249
+			tag.attributes = parserData.attributes;
250
+			tag.empty = parserData.empty;
251
+		} else {
252
+			throw new XmlParseException("Error parsing tag header", i);
253
+		}
254
+
255
+		return i;
256
+	}
257
+
258
+	/**
259
+	 * Convert a char to its corresponding TagHeaderAction for the tag header
260
+	 * parser state machine
261
+	 * 
262
+	 * @param c
263
+	 *            Character to parse
264
+	 * @return The corresponding TagHeaderAction for the input character
265
+	 */
266
+	private static TagHeaderActions parseCharIntoTagHeaderAction(char c) {
267
+		// By default we mark it as a valid value character
268
+		TagHeaderActions res = TagHeaderActions.NameChar;
269
+
270
+		// Checking if it's some form of whitespace
271
+		if (Character.isWhitespace(c)) {
272
+			res = TagHeaderActions.Space;
273
+		} else if (c == '<') {
274
+			res = TagHeaderActions.TagInit;
275
+		} else if (c == '>') {
276
+			res = TagHeaderActions.TagEnd;
277
+		} else if (c == '/') {
278
+			res = TagHeaderActions.Slash;
279
+		} else if (c == '\'' || c == '"') {
280
+			res = TagHeaderActions.Invalid;
281
+		}
282
+
283
+		return res;
284
+	}
285
+
286
+	/**
287
+	 * Process a state transition
288
+	 * 
289
+	 * @param pos
290
+	 *            Current parsing position
291
+	 * @param from
292
+	 *            Old state
293
+	 * @param to
294
+	 *            New state
295
+	 * @param parserData
296
+	 *            Here it will be stored name and value positions as they are
297
+	 *            found
298
+	 * @return the position from where to continue parsing
299
+	 */
300
+	private static int processTagHeaderStateTransition(String xml, int pos, TagHeaderStates from, TagHeaderStates to,
301
+			TagHeaderParserData parserData) {
302
+		// By default we continue parsing from the next character
303
+		int res = pos + 1;
304
+
305
+		// Transition from a non-name state to a name state, we store the
306
+		// initial position of the name
307
+		if (from != TagHeaderStates.Name && to == TagHeaderStates.Name) {
308
+			parserData.nameStart = pos;
309
+		}
310
+		// Transition from a name state to a non-name state, we store the final
311
+		// position of the name
312
+		else if (from == TagHeaderStates.Name && to != TagHeaderStates.Name) {
313
+			parserData.nameEnd = pos;
314
+		}
315
+
316
+		// Parse the attribute list, it has its own parser, it will return the
317
+		// position from where to continue parsing
318
+		if (from != TagHeaderStates.AttrList && to == TagHeaderStates.AttrList) {
319
+			res = parseAttributeList(xml, pos, parserData.attributes);
320
+		}
321
+
322
+		// If we find a '/' it means this tag has no body
323
+		if (to == TagHeaderStates.EmptyTagEnd) {
324
+			parserData.empty = true;
325
+		}
326
+
327
+		return res;
328
+	}
329
+
330
+/**
331
+	 * Checks if the next tag is an ending tag with the name of the parent
332
+	 * @param xml
333
+	 * @param pointer position starting with a '<' in xml
334
+	 * @param parentName name of the parent tag to check if it has children
335
+	 * @return if there are tags before the ending tag of the parent
336
+	 */
337
+	private static boolean hasChildren(String xml, int pointer, String parentName) {
338
+		boolean res = false;
339
+
340
+		try {
341
+			parseEndTag(xml, pointer, parentName);
342
+		} catch (XmlParseException e) {
343
+			res = true;
344
+		}
345
+
346
+		return res;
347
+	}
348
+
349
+	/**
350
+	 * Check this tag is the end tag for a specific parent tag
351
+	 * 
352
+	 * @param xml
353
+	 * @param pointer
354
+	 *            points to a tag that should be the end tag for tagName
355
+	 * @param tagName
356
+	 * @return position from where to continue parsing
357
+	 */
358
+	private static int parseEndTag(String xml, int pointer, String tagName) throws XmlParseException {
359
+		int res;
360
+
361
+		boolean correct = xml.startsWith("</", pointer);
362
+
363
+		// Getting everything between the initial '</' and a '>'
364
+		int endPos = xml.indexOf(">", pointer);
365
+		if (correct) {
366
+			correct = correct && endPos != -1;
367
+		}
368
+
369
+		if (correct) {
370
+			String potentialParentEndTag = xml.substring(pointer + "</".length(), endPos);
371
+
372
+			// Trimming any spaces before and after the string we have generated
373
+			potentialParentEndTag = potentialParentEndTag.trim();
374
+
375
+			// Here we should have the name of the parent tag
376
+			correct = tagName.equals(potentialParentEndTag);
377
+		}
378
+
379
+		if (correct) {
380
+			res = endPos + 1;
381
+		} else {
382
+			throw new XmlParseException(String.format("Expecting end tag <%s/>", tagName), pointer);
383
+		}
384
+
385
+		return res;
386
+	}
387
+
388
+	/**
389
+	 * All the different states the attribute parser can be in.
390
+	 */
391
+	private enum AttrStates {
392
+		Init, Name, PreSeparator, Separator, PostSeparator, SingleQuotedContent, DoubleQuotedContent, End, Invalid
393
+	}
394
+
395
+	/**
396
+	 * Enumeration with all the possible events we can receive when parsing an
397
+	 * attribute.
398
+	 */
399
+	private enum AttrActions {
400
+		Space, NameChar, Separator, SingleQuote, DoubleQuote, Slash, Invalid
401
+	}
402
+
403
+	/**
404
+	 * Structure that describes the attribute parser state machine. There is an
405
+	 * array entry for each attribute parser state, and for each of these
406
+	 * there's an array with an entry for each attribute parser action that
407
+	 * describes to which state should go when receiving that action. For
408
+	 * instance in the attribute name state if we receive another character we
409
+	 * stay in the attribute name state, but if we receive an '=' we go the
410
+	 * attribute separator state.
411
+	 */
412
+	private static final AttrStates[][] AttrStateMachine = new AttrStates[][] {
413
+			// Init state
414
+			{ AttrStates.Init, AttrStates.Name, AttrStates.Invalid, AttrStates.Invalid, AttrStates.Invalid,
415
+					AttrStates.Invalid, AttrStates.Invalid },
416
+			// Attribute name state
417
+			{ AttrStates.PreSeparator, AttrStates.Name, AttrStates.Separator, AttrStates.Invalid, AttrStates.Invalid,
418
+					AttrStates.Invalid, AttrStates.Invalid },
419
+			// Attribute pre separator state
420
+			{ AttrStates.PreSeparator, AttrStates.Invalid, AttrStates.Separator, AttrStates.Invalid,
421
+					AttrStates.Invalid, AttrStates.Invalid, AttrStates.Invalid },
422
+			// Attribute separator state
423
+			{ AttrStates.PostSeparator, AttrStates.Invalid, AttrStates.Invalid, AttrStates.SingleQuotedContent,
424
+					AttrStates.DoubleQuotedContent, AttrStates.Invalid, AttrStates.Invalid },
425
+			// Attribute separator post state
426
+			{ AttrStates.PostSeparator, AttrStates.Invalid, AttrStates.Invalid, AttrStates.SingleQuotedContent,
427
+					AttrStates.DoubleQuotedContent, AttrStates.Invalid, AttrStates.Invalid },
428
+			// Single quoted content state
429
+			{ AttrStates.SingleQuotedContent, AttrStates.SingleQuotedContent, AttrStates.SingleQuotedContent,
430
+					AttrStates.End, AttrStates.SingleQuotedContent, AttrStates.SingleQuotedContent, AttrStates.Invalid },
431
+			// Double quoted content state
432
+			{ AttrStates.DoubleQuotedContent, AttrStates.DoubleQuotedContent, AttrStates.DoubleQuotedContent,
433
+					AttrStates.DoubleQuotedContent, AttrStates.End, AttrStates.DoubleQuotedContent, AttrStates.Invalid }, };
434
+
435
+	/**
436
+	 * Attribute parser data such as where is the position where the attribute
437
+	 * name starts and so forth
438
+	 */
439
+	private static class AttrParserData {
440
+		public int nameStart = 0;
441
+		public int nameEnd = 0;
442
+		public int valueStart = 0;
443
+		public int valueEnd = 0;
444
+	}
445
+
446
+	/**
447
+	 * Parse an attribute list, if it doesn't find anything or finds something
448
+	 * not belonging to an attribute list returns with the position of the
449
+	 * offending character, in the meantime it will have filled the attributes
450
+	 * hashtable argument with all the attributes it has found.
451
+	 * 
452
+	 * @param xml
453
+	 *            String to look for an attribute list
454
+	 * @param pointer
455
+	 *            Position from where to start parsing
456
+	 * @param attributes
457
+	 *            Output parameter to place all key-value entries with the
458
+	 *            attributes found
459
+	 * @return the position for the parser to continue on
460
+	 */
461
+	private static int parseAttributeList(String xml, int pointer, HashMap<String, String> attributes) {
462
+		int i = pointer;
463
+		boolean done = false;
464
+
465
+		// Go parsing attributes until we find something it is not an XML tag
466
+		// attribute
467
+		do {
468
+			AttrStates state = AttrStates.Init;
469
+			AttrParserData parserData = new AttrParserData();
470
+			boolean attrDone = i >= xml.length() || state == AttrStates.End || state == AttrStates.Invalid;
471
+			while (!attrDone) {
472
+				// From the current character determine its corresponding action
473
+				// in the state machine
474
+				char nextChar = xml.charAt(i);
475
+				AttrActions action = parseCharIntoAttrAction(nextChar);
476
+
477
+				// Apply the action to the current state of the state machine
478
+				// and obtain its resulting new state
479
+				AttrStates newState = AttrStateMachine[state.ordinal()][action.ordinal()];
480
+
481
+				// Process this state transition
482
+				if (state != newState) // In this parser interesting stuff only
483
+										// happens when we change state
484
+				{
485
+					i = processAttrStateTransition(i, state, newState, parserData);
486
+				} else {
487
+					i++;
488
+				}
489
+
490
+				state = newState;
491
+
492
+				attrDone = i >= xml.length() || state == AttrStates.End || state == AttrStates.Invalid;
493
+			}
494
+
495
+			// If the attribute parsing was successful store it in the hash
496
+			// table
497
+			if (state == AttrStates.End) {
498
+				String name = xml.substring(parserData.nameStart, parserData.nameEnd);
499
+				String value = xml.substring(parserData.valueStart, parserData.valueEnd);
500
+
501
+				// Escaping value literal
502
+				value = unescapeXmlLiteral(value);
503
+
504
+				attributes.put(name, value);
505
+			}
506
+
507
+			done = i >= xml.length() || state == AttrStates.Invalid;
508
+		} while (!done);
509
+
510
+		return i;
511
+	}
512
+
513
+	/**
514
+	 * Convert a char to its corresponding AttrAction for the attribute parser
515
+	 * state machine
516
+	 * 
517
+	 * @param c
518
+	 *            Character to parse
519
+	 * @return The corresponding AttrAction for the intut character
520
+	 */
521
+	private static AttrActions parseCharIntoAttrAction(char c) {
522
+		// By default we mark it as a valid value character
523
+		AttrActions res = AttrActions.NameChar;
524
+
525
+		// Checking if it's some form of whitespace
526
+		if (Character.isWhitespace(c)) {
527
+			res = AttrActions.Space;
528
+		}
529
+		// For this parser '<' and '>' are invalid
530
+		else if (c == '<' || c == '>') {
531
+			res = AttrActions.Invalid;
532
+		}
533
+		// '=' separates the name of the attribute and its value
534
+		else if (c == '=') {
535
+			res = AttrActions.Separator;
536
+		}
537
+		// Values can be enclosed in single and double quotes
538
+		else if (c == '\'') {
539
+			res = AttrActions.SingleQuote;
540
+		} else if (c == '"') {
541
+			res = AttrActions.DoubleQuote;
542
+		} else if (c == '/') {
543
+			res = AttrActions.Slash;
544
+		}
545
+
546
+		return res;
547
+	}
548
+
549
+	/**
550
+	 * Process a state transition
551
+	 * 
552
+	 * @param pos
553
+	 *            Current parsing position
554
+	 * @param from
555
+	 *            Old state
556
+	 * @param to
557
+	 *            New state
558
+	 * @param parserData
559
+	 *            Here it will be stored name and value positions as they are
560
+	 *            found
561
+	 */
562
+	private static int processAttrStateTransition(int pos, AttrStates from, AttrStates to, AttrParserData parserData) {
563
+		int res = pos + 1;
564
+
565
+		// Transition from a non-name state to a name state, we store the
566
+		// initial position of the name
567
+		if (from != AttrStates.Name && to == AttrStates.Name) {
568
+			parserData.nameStart = pos;
569
+		}
570
+		// Transition from a name state to a non-name state, we store the final
571
+		// position of the name
572
+		else if (from == AttrStates.Name && to != AttrStates.Name) {
573
+			parserData.nameEnd = pos;
574
+		}
575
+		// Transition from a non-value state to a value state (single or double
576
+		// quoted), store the initial value pos
577
+		else if ((from != AttrStates.SingleQuotedContent && to == AttrStates.SingleQuotedContent)
578
+				|| (from != AttrStates.DoubleQuotedContent && to == AttrStates.DoubleQuotedContent)) {
579
+			parserData.valueStart = pos + 1;
580
+		}
581
+		// Transition from a value state (single or double quoted) to a non
582
+		// value state
583
+		else if ((from == AttrStates.SingleQuotedContent && to == AttrStates.End)
584
+				|| (from == AttrStates.DoubleQuotedContent && to == AttrStates.End)) {
585
+			parserData.valueEnd = pos;
586
+		}
587
+
588
+		// When we are in the invalid state here means this is not part of the
589
+		// attribute list
590
+		if (to == AttrStates.Invalid) {
591
+			res = pos;
592
+		}
593
+
594
+		return res;
595
+	}
596
+
597
+	/**
598
+	 * All the different states the tag header parser can be in.
599
+	 */
600
+	private enum TagContentStates {
601
+		Content, Gt, CDATA, End, Invalid
602
+	}
603
+
604
+	/**
605
+	 * Enumeration with all the possible events we can receive when parsing an
606
+	 * tag header.
607
+	 */
608
+	private enum TagContentActions {
609
+		Char, TagInit, Exclamation, Invalid
610
+	}
611
+
612
+	/**
613
+	 * Structure that describes the tag content parser state machine.
614
+	 */
615
+	private static final TagContentStates[][] TagContentStateMachine = new TagContentStates[][] {
616
+			// Content state
617
+			{ TagContentStates.Content, TagContentStates.Gt, TagContentStates.Invalid, TagContentStates.Invalid },
618
+			// GT state (a '<' has been found and we need to decide if it's a
619
+			// new tag or a CDATA)
620
+			{ TagContentStates.End, TagContentStates.Invalid, TagContentStates.CDATA, TagContentStates.Invalid },
621
+			// CDATA state (it has it's own parser)
622
+			{ TagContentStates.Content, TagContentStates.Gt, TagContentStates.Invalid, TagContentStates.Invalid }, };
623
+
624
+	/**
625
+	 * Tag content parser data It contains all the data fragments it has found
626
+	 * along the way
627
+	 */
628
+	private static class TagContentParserData {
629
+		public TagContentParserData(int pos) {
630
+			lastContentStart = pos;
631
+		}
632
+
633
+		public LinkedList<String> contentBits = new LinkedList<String>();
634
+		public int lastContentStart;
635
+	}
636
+
637
+	/**
638
+	 * Parse the content of a tag
639
+	 * 
640
+	 * @param xml
641
+	 * @param pointer
642
+	 * @param tag
643
+	 * @return position from where to continue parsing
644
+	 * @throws XmlParseException
645
+	 */
646
+	private static int parseTagContent(String xml, int pointer, XmlTag tag) throws XmlParseException {
647
+		TagContentStates state = TagContentStates.Content;
648
+		TagContentParserData parserData = new TagContentParserData(pointer);
649
+
650
+		int i = pointer;
651
+		boolean done = i >= xml.length();
652
+		while (!done) {
653
+			// From the current character determine its corresponding action in
654
+			// the state machine
655
+			char nextChar = xml.charAt(i);
656
+			TagContentActions action = parseCharIntoTagContentAction(nextChar);
657
+
658
+			// Apply the action to the current state of the state machine and
659
+			// obtain its resulting new state
660
+			TagContentStates newState = TagContentStateMachine[state.ordinal()][action.ordinal()];
661
+
662
+			// Process this state transition
663
+			if (state != newState) // In this parser interesting stuff only
664
+									// happens when we change state
665
+			{
666
+				i = processTagContentStateTransition(xml, i, state, newState, parserData);
667
+			} else {
668
+				i++;
669
+			}
670
+
671
+			state = newState;
672
+
673
+			done = i >= xml.length() || state == TagContentStates.End || state == TagContentStates.Invalid;
674
+		}
675
+
676
+		// If the tag content parsing was successful combine all the string bits
677
+		// we have found into one
678
+		if (state == TagContentStates.End) {
679
+			String contentBit = parserData.contentBits.poll();
680
+			while (contentBit != null) {
681
+				// TODO: Java seems to not have an efficient way of joining all
682
+				// the strings into one using its
683
+				// standard library, for now doing it like this
684
+				tag.content += contentBit;
685
+
686
+				contentBit = parserData.contentBits.poll();
687
+			}
688
+
689
+			// We have to return a position minus two, because we have parsed a
690
+			// '<' plus something else
691
+			i -= 2;
692
+		} else {
693
+			throw new XmlParseException("Error parsing tag content", i);
694
+		}
695
+
696
+		return i;
697
+	}
698
+
699
+	/**
700
+	 * Convert a char to its corresponding TagContentAction for the tag content
701
+	 * parser state machine
702
+	 * 
703
+	 * @param c
704
+	 *            Character to parse
705
+	 * @return The corresponding TagContentAction for the input character
706
+	 */
707
+	private static TagContentActions parseCharIntoTagContentAction(char c) {
708
+		// By default we mark it as a valid value character
709
+		TagContentActions res = TagContentActions.Char;
710
+
711
+		if (c == '<') {
712
+			res = TagContentActions.TagInit;
713
+		} else if (c == '!') {
714
+			res = TagContentActions.Exclamation;
715
+		} else if (c == '>' || c == '\'' || c == '"') {
716
+			res = TagContentActions.Invalid;
717
+		}
718
+
719
+		return res;
720
+	}
721
+
722
+	/**
723
+	 * Process changing from one content tag parsing state to another
724
+	 * 
725
+	 * @param xml
726
+	 * @param pos
727
+	 * @param from
728
+	 * @param to
729
+	 * @param parserData
730
+	 * @return Position from where to continue parsing
731
+	 * @throws XmlParseException
732
+	 */
733
+	private static int processTagContentStateTransition(String xml, int pos, TagContentStates from,
734
+			TagContentStates to, TagContentParserData parserData) throws XmlParseException {
735
+		int res = pos + 1;
736
+
737
+		// Transition from a non-content state to a content state, we store the
738
+		// initial position of this content bit
739
+		if (from != TagContentStates.Content && to == TagContentStates.Content) {
740
+			parserData.lastContentStart = pos;
741
+		}
742
+		// Transition from a content state to a non-content state, get the
743
+		// substring for this content
744
+		if (from == TagContentStates.Content && to != TagContentStates.Content) {
745
+			if (pos != parserData.lastContentStart) {
746
+				String contentBit = xml.substring(parserData.lastContentStart, pos);
747
+
748
+				// Trimming initial and final spaces
749
+				contentBit = contentBit.trim();
750
+
751
+				// unescaping string bit
752
+				contentBit = unescapeXmlLiteral(contentBit);
753
+
754
+				// Removing linefeeds and tabs
755
+				contentBit = removeTabsAndLinefeeds(contentBit);
756
+
757
+				parserData.contentBits.add(contentBit);
758
+			}
759
+		}
760
+		// Transition to a CDATA state
761
+		else if (from != TagContentStates.CDATA && to == TagContentStates.CDATA) {
762
+			// We take away one from pos because pos has already passed over the
763
+			// '<!' of the '<![CDATA['
764
+			res = parseCDATA(xml, pos - 1, parserData);
765
+		}
766
+
767
+		return res;
768
+	}
769
+
770
+	/**
771
+	 * Parses a CDATA piece of content
772
+	 * 
773
+	 * @param xml
774
+	 * @param pos
775
+	 *            Position pointing at the very start of a CDATA block
776
+	 *            "<![CDATA["
777
+	 * @param parserData
778
+	 * @return position from where to continue parsing
779
+	 */
780
+	private static int parseCDATA(String xml, int pos, TagContentParserData parserData) throws XmlParseException {
781
+		boolean correct = xml.startsWith("<![CDATA[", pos);
782
+		int res = pos;
783
+
784
+		if (correct) {
785
+			int cdataEnd = xml.indexOf("]]>", pos);
786
+
787
+			correct = cdataEnd != -1;
788
+			// We have a correct CDATA block
789
+			if (correct) {
790
+				String contentBit = xml.substring(pos + "<![CDATA[".length(), cdataEnd);
791
+
792
+				parserData.contentBits.add(contentBit);
793
+
794
+				res = cdataEnd + "]]>".length();
795
+			}
796
+		}
797
+
798
+		if (!correct) {
799
+			throw new XmlParseException("Error parsing CDATA block", pos);
800
+		}
801
+
802
+		return res;
803
+	}
804
+
805
+	private static class StringPair {
806
+		public StringPair(String first, String second) {
807
+			this.first = first;
808
+			this.second = second;
809
+		}
810
+
811
+		public String first;
812
+		public String second;
813
+	}
814
+
815
+	private static final StringPair[] EscapedEntities = new StringPair[] { new StringPair("&lt", "<"),
816
+			new StringPair("&gt", ">"), new StringPair("&amp", "&"), new StringPair("&apos", "'"),
817
+			new StringPair("&quot", "\""), };
818
+
819
+	/**
820
+	 * Unescape XML literal, that is &lt, &gt, &amp, &apos, &quot
821
+	 * 
822
+	 * @param literal
823
+	 * @return
824
+	 */
825
+	public static String unescapeXmlLiteral(String literal) {
826
+		String res = literal;
827
+
828
+		// Look for any escaped entities
829
+		for (int i = 0; i < EscapedEntities.length; i++) {
830
+			// For each entity replace all of its occurrences
831
+			res = Utils.ReplaceStr(res, EscapedEntities[i].first, EscapedEntities[i].second);
832
+		}
833
+
834
+		return res;
835
+	}
836
+
837
+	/**
838
+	 * Escape XML literal, that is &lt, &gt, &amp, &apos, &quot
839
+	 * 
840
+	 * @param literal
841
+	 * @param skip
842
+	 *            List of tokens to skip escaping
843
+	 * @return escaped literal
844
+	 */
845
+	public static String escapeXmlLiteral(String literal, String[] skip) {
846
+		String res = literal;
847
+		TreeSet<String> skipSet = new TreeSet<String>();
848
+		if (skip != null) {
849
+			for (int i = 0; i < skip.length; i++) {
850
+				skipSet.add(skip[i]);
851
+			}
852
+		}
853
+
854
+		// Look for any escaped entities
855
+		for (int i = 0; i < EscapedEntities.length; i++) {
856
+			if (!skipSet.contains(EscapedEntities[i].second)) {
857
+				// For each entity replace all of its occurrences
858
+				res = Utils.ReplaceStr(res, EscapedEntities[i].second, EscapedEntities[i].first);
859
+			}
860
+		}
861
+
862
+		return res;
863
+	}
864
+
865
+	private static String removeTabsAndLinefeeds(String literal) {
866
+		String res = literal;
867
+
868
+		res = Utils.ReplaceStr(res, "\n", "");
869
+		res = Utils.ReplaceStr(res, "\t", "");
870
+		res = Utils.ReplaceStr(res, "\r", "");
871
+
872
+		return res;
873
+	}
874
+
875
+}
0 876
new file mode 100644
... ...
@@ -0,0 +1,39 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+/**
20
+ * XML prologue description
21
+ */
22
+public class XmlPrologue {
23
+	/**
24
+	 * XML version
25
+	 */
26
+	public String version;
27
+
28
+	/**
29
+	 * Character encoding
30
+	 */
31
+	public String encoding;
32
+
33
+	@Override
34
+	public String toString() {
35
+		String res = String.format("<?xml version=\"%s\" encoding=\"%s\"?>", version, encoding);
36
+
37
+		return res;
38
+	}
39
+}
0 40
new file mode 100644
... ...
@@ -0,0 +1,132 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+import java.util.AbstractMap;
20
+import java.util.AbstractSequentialList;
21
+import java.util.HashMap;
22
+import java.util.Iterator;
23
+import java.util.LinkedList;
24
+import java.util.TreeSet;
25
+
26
+/**
27
+ * Class that represents an XML tag with its children, content and attributes.
28
+ * It doesn't have getter or setter methods for its properties as it is only
29
+ * intended to be a simple data container.
30
+ */
31
+public class XmlTag {
32
+	/**
33
+	 * Name for this tag
34
+	 */
35
+	public String name = "";
36
+
37
+	/**
38
+	 * Dictionary with all the attributes for this tag.
39
+	 */
40
+	public AbstractMap<String, String> attributes = new HashMap<String, String>();
41
+
42
+	/**
43
+	 * Ordered list with all the tag children for this tag.
44
+	 */
45
+	public AbstractSequentialList<XmlTag> children = new LinkedList<XmlTag>();
46
+
47
+	/**
48
+	 * Tag contents which are not child tags, i.e. <tag>content</tag>. If a tag
49
+	 * has content and child tags mixed all the content will be aggregated here
50
+	 * in one unit.
51
+	 */
52
+	public String content = "";
53
+
54
+	/**
55
+	 * If this tag is singleton. That is, it doesn't have a closing tag, for
56
+	 * instance <emptytag />
57
+	 */
58
+	public boolean empty = false;
59
+
60
+	@Override
61
+	public String toString() {
62
+		String res;
63
+
64
+		// Printing tag header (name and attributes)
65
+		String attrStr = "";
66
+		Iterator<String> keyIter = attributes.keySet().iterator();
67
+
68
+		// Sorting attributes (so its easier to writes tests)
69
+		TreeSet<String> treeSet = new TreeSet<String>();
70
+		while (keyIter.hasNext()) {
71
+			treeSet.add(keyIter.next());
72
+		}
73
+
74
+		// Rendering the attributes
75
+		Iterator<String> sortedKeyIter = treeSet.iterator();
76
+		while (sortedKeyIter.hasNext()) {
77
+			String key = XmlParser.escapeXmlLiteral(sortedKeyIter.next(), null);
78
+			String value = XmlParser.escapeXmlLiteral(attributes.get(key), new String[] { "\"", "'" });
79
+
80
+			boolean valueHasDoubleQuotes = value.indexOf("\"") != -1;
81
+			String attrFormat = valueHasDoubleQuotes ? "%s='%s' " : "%s=\"%s\" ";
82
+			attrStr += String.format(attrFormat, key, value);
83
+		}
84
+
85
+		String headerFormat = empty ? "<%s %s/>" : "<%s %s>";
86
+		res = String.format(headerFormat, XmlParser.escapeXmlLiteral(name, null), attrStr);
87
+
88
+		// if the header is not empty print its children and ending tag
89
+		if (!empty) {
90
+			String childrenStr = "";
91
+
92
+			Iterator<XmlTag> childIter = children.iterator();
93
+
94
+			while (childIter.hasNext()) {
95
+				XmlTag child = childIter.next();
96
+
97
+				childrenStr += child.toString();
98
+			}
99
+
100
+			// Checking if the content has any char that needs to be inside a
101
+			// CDATA block
102
+			String procContent = contentHasSpecialChars() ? String.format("<![CDATA[%s]]>", content) : content;
103
+
104
+			// Adding the ending tag
105
+			res = String.format("%s%s%s</%s>", res, childrenStr, procContent, XmlParser.escapeXmlLiteral(name, null));
106
+		}
107
+
108
+		return res;
109
+	}
110
+
111
+	/**
112
+	 * This is the list of special characters, this list might not be exhausted,
113
+	 * but my use of XML printing is mainly for testing purposes.
114
+	 */
115
+	private static char[] SpecialChars = new char[] { '\n', '\r', '\t', '<', '>', '&', '\'', '"', };
116
+
117
+	/**
118
+	 * @return The content of this tag has special characters and should be
119
+	 *         printed enclosed in a CDATA block
120
+	 */
121
+	private boolean contentHasSpecialChars() {
122
+		boolean res = false;
123
+		for (int i = 0; i < SpecialChars.length && !res; i++) {
124
+			res = content.indexOf(SpecialChars[i]) != -1;
125
+		}
126
+
127
+		// Check if it has initial or final spaces
128
+		res = res || !content.trim().equals(content);
129
+
130
+		return res;
131
+	}
132
+}
0 133
new file mode 100644
... ...
@@ -0,0 +1,60 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+import junit.framework.TestCase;
20
+
21
+/**
22
+ * Utilities unit tests
23
+ */
24
+public class UtilsTest extends TestCase {
25
+
26
+	/**
27
+	 * Test replacing all occurrences of a single character.
28
+	 */
29
+	public void testReplaceStr() {
30
+		String toReplace = "b";
31
+		String replacement = "c";
32
+
33
+		String[] inputs = new String[] { "aaaa", "bbbb", "aaabbaa" };
34
+
35
+		String[] outputs = new String[] { "aaaa", "cccc", "aaaccaa" };
36
+
37
+		for (int i = 0; i < inputs.length; i++) {
38
+			String res = Utils.ReplaceStr(inputs[i], toReplace, replacement);
39
+			assertEquals(outputs[i], res);
40
+		}
41
+	}
42
+
43
+	/**
44
+	 * Test replacing all occurrences of a string longer than one character.
45
+	 */
46
+	public void testReplaceStrMulti() {
47
+		String toReplace = "bb";
48
+		String replacement = "c";
49
+
50
+		String[] inputs = new String[] { "aaaa", "bbbb", "aaabbaa" };
51
+
52
+		String[] outputs = new String[] { "aaaa", "cc", "aaacaa" };
53
+
54
+		for (int i = 0; i < inputs.length; i++) {
55
+			String res = Utils.ReplaceStr(inputs[i], toReplace, replacement);
56
+			assertEquals(outputs[i], res);
57
+		}
58
+	}
59
+
60
+}
0 61
new file mode 100644
... ...
@@ -0,0 +1,241 @@
1
+/**
2
+ * Copyright Antoni Silvestre
3
+ *
4
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5
+ * use this file except in compliance with the License. You may obtain a copy of
6
+ * the License at
7
+ *
8
+ * http://www.apache.org/licenses/LICENSE-2.0
9
+ *
10
+ * Unless required by applicable law or agreed to in writing, software
11
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13
+ * License for the specific language governing permissions and limitations under
14
+ * the License.
15
+ */
16
+
17
+package com.github.asilvestre.jpurexml;
18
+
19
+import junit.framework.TestCase;
20
+
21
+/**
22
+ * Parses the main XML parsing functionality
23
+ */
24
+public class XmlParserTest extends TestCase {
25
+
26
+	/**
27
+	 * Test we parse the prologue correctly
28
+	 */
29
+	public void testParsePrologue() {
30
+		String[] inputs = new String[] { "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root/>",
31
+				"<?xml version='1.0' encoding=\"UTF-8\"?><root/>",
32
+				"<?xml version='2.0' encoding=\"UTF-8\"?><   root     />",
33
+				"<?xml encoding = \"UTF-8\"     version='1.0' ?><root/>",
34
+				"<?xml a='r'   encoding = \"UTF-8\"     version='1.0' ?><root/>", };
35
+
36
+		String[] outputs = new String[] { "<?xml version=\"1.0\" encoding=\"UTF-8\"?><root />",
37
+				"<?xml version=\"1.0\" encoding=\"UTF-8\"?><root />",
38
+				"<?xml version=\"2.0\" encoding=\"UTF-8\"?><root />",
39
+				"<?xml version=\"1.0\" encoding=\"UTF-8\"?><root />",
40
+				"<?xml version=\"1.0\" encoding=\"UTF-8\"?><root />", };