|
| 1 | +package com.avast.server.hdfsshell.utils; |
| 2 | + |
| 3 | +/*BEGIN_COPYRIGHT_BLOCK |
| 4 | + * |
| 5 | + * Copyright (c) 2001-2010, JavaPLT group at Rice University ([email protected]) |
| 6 | + * All rights reserved. |
| 7 | + * |
| 8 | + * Redistribution and use in source and binary forms, with or without |
| 9 | + * modification, are permitted provided that the following conditions are met: |
| 10 | + * * Redistributions of source code must retain the above copyright |
| 11 | + * notice, this list of conditions and the following disclaimer. |
| 12 | + * * Redistributions in binary form must reproduce the above copyright |
| 13 | + * notice, this list of conditions and the following disclaimer in the |
| 14 | + * documentation and/or other materials provided with the distribution. |
| 15 | + * * Neither the names of DrJava, the JavaPLT group, Rice University, nor the |
| 16 | + * names of its contributors may be used to endorse or promote products |
| 17 | + * derived from this software without specific prior written permission. |
| 18 | + * |
| 19 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR |
| 23 | + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 24 | + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 25 | + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 26 | + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 27 | + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 28 | + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 29 | + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | + * |
| 31 | + * This software is Open Source Initiative approved Open Source Software. |
| 32 | + * Open Source Initative Approved is a trademark of the Open Source Initiative. |
| 33 | + * |
| 34 | + * This file is part of DrJava. Download the current version of this project |
| 35 | + * from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/ |
| 36 | + * |
| 37 | + * END_COPYRIGHT_BLOCK*/ |
| 38 | + |
| 39 | +import java.util.LinkedList; |
| 40 | +import java.util.List; |
| 41 | + |
| 42 | +/** |
| 43 | + * Utility class which can tokenize a String into a list of String arguments, |
| 44 | + * with behavior similar to parsing command line arguments to a program. |
| 45 | + * Quoted Strings are treated as single arguments, and escaped characters |
| 46 | + * are translated so that the tokenized arguments have the same meaning. |
| 47 | + * Since all methods are static, the class is declared abstract to prevent |
| 48 | + * instantiation. |
| 49 | + * @version $Id$ |
| 50 | + */ |
| 51 | +public abstract class ArgumentTokenizer { |
| 52 | + private static final int NO_TOKEN_STATE = 0; |
| 53 | + private static final int NORMAL_TOKEN_STATE = 1; |
| 54 | + private static final int SINGLE_QUOTE_STATE = 2; |
| 55 | + private static final int DOUBLE_QUOTE_STATE = 3; |
| 56 | + |
| 57 | + /** Tokenizes the given String into String tokens |
| 58 | + * @param arguments A String containing one or more command-line style arguments to be tokenized. |
| 59 | + * @return A list of parsed and properly escaped arguments. |
| 60 | + */ |
| 61 | + public static List<String> tokenize(String arguments) { |
| 62 | + return tokenize(arguments, false); |
| 63 | + } |
| 64 | + |
| 65 | + /** Tokenizes the given String into String tokens. |
| 66 | + * @param arguments A String containing one or more command-line style arguments to be tokenized. |
| 67 | + * @param stringify whether or not to include escape special characters |
| 68 | + * @return A list of parsed and properly escaped arguments. |
| 69 | + */ |
| 70 | + public static List<String> tokenize(String arguments, boolean stringify) { |
| 71 | + |
| 72 | + LinkedList<String> argList = new LinkedList<String>(); |
| 73 | + StringBuilder currArg = new StringBuilder(); |
| 74 | + boolean escaped = false; |
| 75 | + int state = NO_TOKEN_STATE; // start in the NO_TOKEN_STATE |
| 76 | + int len = arguments.length(); |
| 77 | + |
| 78 | + // Loop over each character in the string |
| 79 | + for (int i = 0; i < len; i++) { |
| 80 | + char c = arguments.charAt(i); |
| 81 | + if (escaped) { |
| 82 | + // Escaped state: just append the next character to the current arg. |
| 83 | + escaped = false; |
| 84 | + currArg.append(c); |
| 85 | + } |
| 86 | + else { |
| 87 | + switch(state) { |
| 88 | + case SINGLE_QUOTE_STATE: |
| 89 | + if (c == '\'') { |
| 90 | + // Seen the close quote; continue this arg until whitespace is seen |
| 91 | + state = NORMAL_TOKEN_STATE; |
| 92 | + } |
| 93 | + else { |
| 94 | + currArg.append(c); |
| 95 | + } |
| 96 | + break; |
| 97 | + case DOUBLE_QUOTE_STATE: |
| 98 | + if (c == '"') { |
| 99 | + // Seen the close quote; continue this arg until whitespace is seen |
| 100 | + state = NORMAL_TOKEN_STATE; |
| 101 | + } |
| 102 | + else if (c == '\\') { |
| 103 | + // Look ahead, and only escape quotes or backslashes |
| 104 | + i++; |
| 105 | + char next = arguments.charAt(i); |
| 106 | + if (next == '"' || next == '\\') { |
| 107 | + currArg.append(next); |
| 108 | + } |
| 109 | + else { |
| 110 | + currArg.append(c); |
| 111 | + currArg.append(next); |
| 112 | + } |
| 113 | + } |
| 114 | + else { |
| 115 | + currArg.append(c); |
| 116 | + } |
| 117 | + break; |
| 118 | +// case NORMAL_TOKEN_STATE: |
| 119 | +// if (Character.isWhitespace(c)) { |
| 120 | +// // Whitespace ends the token; start a new one |
| 121 | +// argList.add(currArg.toString()); |
| 122 | +// currArg = new StringBuffer(); |
| 123 | +// state = NO_TOKEN_STATE; |
| 124 | +// } |
| 125 | +// else if (c == '\\') { |
| 126 | +// // Backslash in a normal token: escape the next character |
| 127 | +// escaped = true; |
| 128 | +// } |
| 129 | +// else if (c == '\'') { |
| 130 | +// state = SINGLE_QUOTE_STATE; |
| 131 | +// } |
| 132 | +// else if (c == '"') { |
| 133 | +// state = DOUBLE_QUOTE_STATE; |
| 134 | +// } |
| 135 | +// else { |
| 136 | +// currArg.append(c); |
| 137 | +// } |
| 138 | +// break; |
| 139 | + case NO_TOKEN_STATE: |
| 140 | + case NORMAL_TOKEN_STATE: |
| 141 | + switch(c) { |
| 142 | + case '\\': |
| 143 | + escaped = true; |
| 144 | + state = NORMAL_TOKEN_STATE; |
| 145 | + break; |
| 146 | + case '\'': |
| 147 | + state = SINGLE_QUOTE_STATE; |
| 148 | + break; |
| 149 | + case '"': |
| 150 | + state = DOUBLE_QUOTE_STATE; |
| 151 | + break; |
| 152 | + default: |
| 153 | + if (!Character.isWhitespace(c)) { |
| 154 | + currArg.append(c); |
| 155 | + state = NORMAL_TOKEN_STATE; |
| 156 | + } |
| 157 | + else if (state == NORMAL_TOKEN_STATE) { |
| 158 | + // Whitespace ends the token; start a new one |
| 159 | + argList.add(currArg.toString()); |
| 160 | + currArg = new StringBuilder(); |
| 161 | + state = NO_TOKEN_STATE; |
| 162 | + } |
| 163 | + } |
| 164 | + break; |
| 165 | + default: |
| 166 | + throw new IllegalStateException("ArgumentTokenizer state " + state + " is invalid!"); |
| 167 | + } |
| 168 | + } |
| 169 | + } |
| 170 | + |
| 171 | + // If we're still escaped, put in the backslash |
| 172 | + if (escaped) { |
| 173 | + currArg.append('\\'); |
| 174 | + argList.add(currArg.toString()); |
| 175 | + } |
| 176 | + // Close the last argument if we haven't yet |
| 177 | + else if (state != NO_TOKEN_STATE) { |
| 178 | + argList.add(currArg.toString()); |
| 179 | + } |
| 180 | + // Format each argument if we've been told to stringify them |
| 181 | + if (stringify) { |
| 182 | + for (int i = 0; i < argList.size(); i++) { |
| 183 | + argList.set(i, "\"" + _escapeQuotesAndBackslashes(argList.get(i)) + "\""); |
| 184 | + } |
| 185 | + } |
| 186 | + return argList; |
| 187 | + } |
| 188 | + |
| 189 | + /** Inserts backslashes before any occurrences of a backslash or |
| 190 | + * quote in the given string. Also converts any special characters |
| 191 | + * appropriately. |
| 192 | + */ |
| 193 | + protected static String _escapeQuotesAndBackslashes(String s) { |
| 194 | + final StringBuilder buf = new StringBuilder(s); |
| 195 | + |
| 196 | + // Walk backwards, looking for quotes or backslashes. |
| 197 | + // If we see any, insert an extra backslash into the buffer at |
| 198 | + // the same index. (By walking backwards, the index into the buffer |
| 199 | + // will remain correct as we change the buffer.) |
| 200 | + for (int i = s.length()-1; i >= 0; i--) { |
| 201 | + char c = s.charAt(i); |
| 202 | + if ((c == '\\') || (c == '"')) { |
| 203 | + buf.insert(i, '\\'); |
| 204 | + } |
| 205 | + // Replace any special characters with escaped versions |
| 206 | + else if (c == '\n') { |
| 207 | + buf.deleteCharAt(i); |
| 208 | + buf.insert(i, "\\n"); |
| 209 | + } |
| 210 | + else if (c == '\t') { |
| 211 | + buf.deleteCharAt(i); |
| 212 | + buf.insert(i, "\\t"); |
| 213 | + } |
| 214 | + else if (c == '\r') { |
| 215 | + buf.deleteCharAt(i); |
| 216 | + buf.insert(i, "\\r"); |
| 217 | + } |
| 218 | + else if (c == '\b') { |
| 219 | + buf.deleteCharAt(i); |
| 220 | + buf.insert(i, "\\b"); |
| 221 | + } |
| 222 | + else if (c == '\f') { |
| 223 | + buf.deleteCharAt(i); |
| 224 | + buf.insert(i, "\\f"); |
| 225 | + } |
| 226 | + } |
| 227 | + return buf.toString(); |
| 228 | + } |
| 229 | +} |
0 commit comments