001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload;
018
019import java.io.UnsupportedEncodingException;
020import java.util.HashMap;
021import java.util.Locale;
022import java.util.Map;
023
024import org.apache.commons.fileupload.util.mime.MimeUtility;
025
026/**
027 * A simple parser intended to parse sequences of name/value pairs.
028 *
029 * Parameter values are expected to be enclosed in quotes if they
030 * contain unsafe characters, such as '=' characters or separators.
031 * Parameter values are optional and can be omitted.
032 *
033 * <p>
034 *  {@code param1 = value; param2 = "anything goes; really"; param3}
035 * </p>
036 */
037public class ParameterParser {
038
039    /**
040     * String to be parsed.
041     */
042    private char[] chars;
043
044    /**
045     * Current position in the string.
046     */
047    private int pos;
048
049    /**
050     * Maximum position in the string.
051     */
052    private int len;
053
054    /**
055     * Start of a token.
056     */
057    private int i1;
058
059    /**
060     * End of a token.
061     */
062    private int i2;
063
064    /**
065     * Whether names stored in the map should be converted to lower case.
066     */
067    private boolean lowerCaseNames;
068
069    /**
070     * Default ParameterParser constructor.
071     */
072    public ParameterParser() {
073    }
074
075    /**
076     * A helper method to process the parsed token. This method removes
077     * leading and trailing blanks as well as enclosing quotation marks,
078     * when necessary.
079     *
080     * @param quoted {@code true} if quotation marks are expected,
081     *               {@code false} otherwise.
082     * @return the token
083     */
084    private String getToken(final boolean quoted) {
085        // Trim leading white spaces
086        while (i1 < i2 && Character.isWhitespace(chars[i1])) {
087            i1++;
088        }
089        // Trim trailing white spaces
090        while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) {
091            i2--;
092        }
093        // Strip away quotation marks if necessary
094        if (quoted
095            && i2 - i1 >= 2
096            && chars[i1] == '"'
097            && chars[i2 - 1] == '"') {
098            i1++;
099            i2--;
100        }
101        String result = null;
102        if (i2 > i1) {
103            result = new String(chars, i1, i2 - i1);
104        }
105        return result;
106    }
107
108    /**
109     * Are there any characters left to parse?
110     *
111     * @return {@code true} if there are unparsed characters,
112     *         {@code false} otherwise.
113     */
114    private boolean hasChar() {
115        return pos < len;
116    }
117
118    /**
119     * Returns {@code true} if parameter names are to be converted to lower
120     * case when name/value pairs are parsed.
121     *
122     * @return {@code true} if parameter names are to be
123     * converted to lower case when name/value pairs are parsed.
124     * Otherwise returns {@code false}
125     */
126    public boolean isLowerCaseNames() {
127        return lowerCaseNames;
128    }
129
130    /**
131     * Tests if the given character is present in the array of characters.
132     *
133     * @param ch the character to test for presence in the array of characters
134     * @param charray the array of characters to test against
135     * @return {@code true} if the character is present in the array of
136     *   characters, {@code false} otherwise.
137     */
138    private boolean isOneOf(final char ch, final char[] charray) {
139        boolean result = false;
140        for (final char element : charray) {
141            if (ch == element) {
142                result = true;
143                break;
144            }
145        }
146        return result;
147    }
148
149    /**
150     * Extracts a map of name/value pairs from the given array of
151     * characters. Names are expected to be unique.
152     *
153     * @param charArray the array of characters that contains a sequence of
154     * name/value pairs
155     * @param separator the name/value pairs separator
156     * @return a map of name/value pairs
157     */
158    public Map<String, String> parse(final char[] charArray, final char separator) {
159        if (charArray == null) {
160            return new HashMap<>();
161        }
162        return parse(charArray, 0, charArray.length, separator);
163    }
164
165    /**
166     * Extracts a map of name/value pairs from the given array of characters. Names are expected to be unique.
167     *
168     * @param charArray the array of characters that contains a sequence of name/value pairs
169     * @param offset    the initial offset.
170     * @param length    the length.
171     * @param separator the name/value pairs separator
172     * @return a map of name/value pairs
173     */
174    public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) {
175        if (charArray == null) {
176            return new HashMap<>();
177        }
178        final HashMap<String, String> params = new HashMap<>();
179        chars = charArray.clone();
180        pos = offset;
181        len = length;
182        while (hasChar()) {
183            String paramName = parseToken(new char[] { '=', separator });
184            String paramValue = null;
185            if (hasChar() && charArray[pos] == '=') {
186                pos++; // skip '='
187                paramValue = parseQuotedToken(new char[] { separator });
188                if (paramValue != null) {
189                    try {
190                        paramValue = RFC2231Utility.hasEncodedValue(paramName) ? RFC2231Utility.decodeText(paramValue) : MimeUtility.decodeText(paramValue);
191                    } catch (final UnsupportedEncodingException e) {
192                        // let's keep the original value in this case
193                    }
194                }
195            }
196            if (hasChar() && charArray[pos] == separator) {
197                pos++; // skip separator
198            }
199            if (paramName != null && !paramName.isEmpty()) {
200                paramName = RFC2231Utility.stripDelimiter(paramName);
201                if (lowerCaseNames) {
202                    paramName = paramName.toLowerCase(Locale.ROOT);
203                }
204                params.put(paramName, paramValue);
205            }
206        }
207        return params;
208    }
209
210    /**
211     * Extracts a map of name/value pairs from the given string. Names are
212     * expected to be unique.
213     *
214     * @param str the string that contains a sequence of name/value pairs
215     * @param separator the name/value pairs separator
216     * @return a map of name/value pairs
217     */
218    public Map<String, String> parse(final String str, final char separator) {
219        if (str == null) {
220            return new HashMap<>();
221        }
222        return parse(str.toCharArray(), separator);
223    }
224
225    /**
226     * Extracts a map of name/value pairs from the given string. Names are
227     * expected to be unique. Multiple separators may be specified and
228     * the earliest found in the input string is used.
229     *
230     * @param str the string that contains a sequence of name/value pairs
231     * @param separators the name/value pairs separators
232     * @return a map of name/value pairs
233     */
234    public Map<String, String> parse(final String str, final char[] separators) {
235        if (separators == null || separators.length == 0) {
236            return new HashMap<>();
237        }
238        char separator = separators[0];
239        if (str != null) {
240            int idx = str.length();
241            for (final char separator2 : separators) {
242                final int tmp = str.indexOf(separator2);
243                if (tmp != -1 && tmp < idx) {
244                    idx = tmp;
245                    separator = separator2;
246                }
247            }
248        }
249        return parse(str, separator);
250    }
251
252    /**
253     * Parses out a token until any of the given terminators
254     * is encountered outside the quotation marks.
255     *
256     * @param terminators the array of terminating characters. Any of these
257     * characters when encountered outside the quotation marks signify the end
258     * of the token
259     *
260     * @return the token
261     */
262    private String parseQuotedToken(final char[] terminators) {
263        char ch;
264        i1 = pos;
265        i2 = pos;
266        boolean quoted = false;
267        boolean charEscaped = false;
268        while (hasChar()) {
269            ch = chars[pos];
270            if (!quoted && isOneOf(ch, terminators)) {
271                break;
272            }
273            if (!charEscaped && ch == '"') {
274                quoted = !quoted;
275            }
276            charEscaped = !charEscaped && ch == '\\';
277            i2++;
278            pos++;
279
280        }
281        return getToken(true);
282    }
283
284    /**
285     * Parses out a token until any of the given terminators
286     * is encountered.
287     *
288     * @param terminators the array of terminating characters. Any of these
289     * characters when encountered signify the end of the token
290     *
291     * @return the token
292     */
293    private String parseToken(final char[] terminators) {
294        char ch;
295        i1 = pos;
296        i2 = pos;
297        while (hasChar()) {
298            ch = chars[pos];
299            if (isOneOf(ch, terminators)) {
300                break;
301            }
302            i2++;
303            pos++;
304        }
305        return getToken(false);
306    }
307
308    /**
309     * Sets the flag if parameter names are to be converted to lower case when
310     * name/value pairs are parsed.
311     *
312     * @param b {@code true} if parameter names are to be
313     * converted to lower case when name/value pairs are parsed.
314     * {@code false} otherwise.
315     */
316    public void setLowerCaseNames(final boolean b) {
317        lowerCaseNames = b;
318    }
319
320}