001/*
002 * JGrapes Event Driven Framework
003 * Copyright (C) 2016-2018 Michael N. Lipp
004 * 
005 * This program is free software; you can redistribute it and/or modify it 
006 * under the terms of the GNU Affero General Public License as published by 
007 * the Free Software Foundation; either version 3 of the License, or 
008 * (at your option) any later version.
009 * 
010 * This program is distributed in the hope that it will be useful, but 
011 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
012 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License 
013 * for more details.
014 * 
015 * You should have received a copy of the GNU Affero General Public License along 
016 * with this program; if not, see <http://www.gnu.org/licenses/>.
017 */
018
019package org.jgrapes.http;
020
021import java.net.URI;
022import java.text.ParseException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.List;
026import java.util.Optional;
027import java.util.Spliterator;
028import java.util.Spliterators.AbstractSpliterator;
029import java.util.StringTokenizer;
030import java.util.function.Consumer;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033import java.util.stream.Collectors;
034import java.util.stream.Stream;
035import java.util.stream.StreamSupport;
036
037/**
038 * A resource pattern can be used to filter URIs. A pattern looks  
039 * similar to a URI (<code>scheme://host:port/path</code>) with the 
040 * following differences:
041 * 
042 *  * The <em>scheme</em> may be a single protocol name or a list
043 *    of protocol names separated by commas, or an asterisk, which is matched
044 *    by URIs with any scheme. The scheme part ({@code scheme://}) is optional 
045 *    in a pattern. Omitting it is equivalent to specifying an asterisk.
046 *    
047 *  * The <em>host</em> may be a host name or an IP address or an asterisk.
048 *    Unless the value is an asterisk, filtered URIs must match the value
049 *    literally.
050 *    
051 *  * The <em>port</em> may be a number or an asterisk.
052 *    Unless the value is an asterisk, filtered URIs must match it.
053 *   
054 *  * Specifying a port ({@code :port}) is optional. If omitted, 
055 *    it is equivalent to specifying an asterisk.
056 *    
057 *  * If the scheme part is omitted, the {@code host:port} part may 
058 *    completely be left out as well, which is
059 *    equivalent to specifying an asterisk for both host and port.
060 *    
061 *  * The optional path part consist of one or more path separated by commas.
062 *  
063 *    Each path consists of a sequence of names and asterisks separated
064 *    by slashes or a vertical bar ("|"). A name must be matched by the 
065 *    corresponding path element of filtered URIs, an asterisk is matched by 
066 *    any corresponding path element (which, however, must exist in the 
067 *    filtered URI). The final element in 
068 *    the path of a pattern may be two asterisks ({@code **}), which matches 
069 *    any remaining path elements in the filtered URI.
070 *    
071 *    Using a vertical bar instead of a slash separates the path in a
072 *    prefix part and the rest. The number of prefix segments is the
073 *    value returned by the match methods.
074 *    
075 *    If a path ends with a vertical bar and the URI matched with the
076 *    path does not end with a slash, a slash is appended to the URI
077 *    before matching. This causes both `/foo` and `/foo/` to match
078 *    a path `/foo|`. This is usually intended. If you do want to
079 *    treat `/foo` as a leaf, specify `/foo,/foo|` in your pattern.
080 *
081 */
082@SuppressWarnings("PMD.GodClass")
083public class ResourcePattern {
084
085    @SuppressWarnings("PMD.AvoidFieldNameMatchingTypeName")
086    private static Pattern resourcePattern = Pattern.compile(
087        "^((?<proto>[^:]+|\\*)://)?" // Optional protocol (2)
088            + "(" // Start of optional host/port part
089            + "(?<host>([^\\[/\\|][^:/\\|]*)|(\\[[^\\]]*\\]))" // Host (4)
090            + "(:(?<port>([0-9]+)|(\\*)))?" // Optional port (8)
091            + ")?" // End of optional host/port
092            + "(?<path>[/\\|].*)?"); // Finally path (11)
093
094    private final String pattern;
095    private final String protocol;
096    private final String host;
097    private final String port;
098    private final String path;
099    private final String[][] pathPatternElements;
100    private int[] prefixSegs;
101
102    /**
103     * Creates a new resource pattern.
104     * 
105     * @param pattern the pattern to be used for matching
106     * @throws ParseException if an invalid pattern is specified
107     */
108    @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
109    public ResourcePattern(String pattern) throws ParseException {
110        this.pattern = pattern;
111        Matcher rpm = resourcePattern.matcher(pattern);
112        if (!rpm.matches()) {
113            throw new ParseException("Invalid pattern: " + pattern, 0);
114        }
115        protocol = rpm.group("proto");
116        host = rpm.group("host");
117        port = rpm.group("port");
118        path = rpm.group("path");
119        if (path == null) {
120            pathPatternElements = new String[0][];
121        } else {
122            String[] paths = path.split(",");
123            pathPatternElements = new String[paths.length][];
124            prefixSegs = new int[paths.length];
125            for (int i = 0; i < paths.length; i++) {
126                @SuppressWarnings("PMD.AvoidInstantiatingObjectsInLoops")
127                List<String> segs = new ArrayList<>();
128                prefixSegs[i] = 0;
129                StringTokenizer tokenizer = new StringTokenizer(
130                    paths[i], "/|", true);
131                while (tokenizer.hasMoreTokens()) {
132                    String token = tokenizer.nextToken();
133                    switch (token) {
134                    case "/":
135                        continue;
136                    case "|":
137                        prefixSegs[i] = segs.size();
138                        continue;
139                    default:
140                        segs.add(token);
141                        continue;
142                    }
143                }
144                if (paths[i].endsWith("/") || paths[i].endsWith("|")) {
145                    segs.add("");
146                }
147                pathPatternElements[i] = segs.toArray(new String[0]);
148            }
149        }
150    }
151
152    /**
153     * @return the pattern (string) that was used to create this 
154     * resource pattern.
155     */
156    public String pattern() {
157        return pattern;
158    }
159
160    /**
161     * @return the protocol value specified in the pattern or {@code null}
162     */
163    public String protocol() {
164        return protocol;
165    }
166
167    /**
168     * @return the host value specified in the pattern or {@code null}
169     */
170    public String host() {
171        return host;
172    }
173
174    /**
175     * @return the port value specified in the pattern or {@code null}
176     */
177    public String port() {
178        return port;
179    }
180
181    /**
182     * @return the path value specified in the pattern or {@code null}
183     */
184    public String path() {
185        return path;
186    }
187
188    /**
189     * Matches the given resource URI against the pattern.
190     * 
191     * @param resource the URI specifying the resource to match
192     * @return -1 if the resource does not match, else the number
193     * of prefix segments (which may be 0)
194     */
195    @SuppressWarnings({ "PMD.CyclomaticComplexity", "PMD.NPathComplexity",
196        "PMD.CollapsibleIfStatements", "PMD.DataflowAnomalyAnalysis" })
197    public int matches(URI resource) {
198        if (protocol != null && !protocol.equals("*")) {
199            if (resource.getScheme() == null) {
200                return -1;
201            }
202            if (Arrays.stream(protocol.split(","))
203                .noneMatch(proto -> proto.equals(resource.getScheme()))) {
204                return -1;
205            }
206        }
207        if (host != null && !host.equals("*")) {
208            if (resource.getHost() == null
209                || !resource.getHost().equals(host)) {
210                return -1;
211            }
212        }
213        if (port != null && !port.equals("*")) {
214            if (Integer.parseInt(port) != resource.getPort()) {
215                return -1;
216            }
217        }
218
219        String[] reqElements = PathSpliterator.stream(resource.getPath())
220            .skip(1).toArray(size -> new String[size]);
221        String[] reqElementsPlus = null; // Created lazily
222        for (int pathIdx = 0; pathIdx < pathPatternElements.length; pathIdx++) {
223            String[] pathPattern = pathPatternElements[pathIdx];
224            if (prefixSegs[pathIdx] == pathPattern.length - 1
225                && lastIsEmpty(pathPattern)) {
226                // Special case, pattern ends with vertical bar
227                if (reqElementsPlus == null) {
228                    reqElementsPlus = reqElements;
229                    if (!lastIsEmpty(reqElementsPlus)) {
230                        reqElementsPlus = Arrays.copyOf(
231                            reqElementsPlus, reqElementsPlus.length + 1);
232                        reqElementsPlus[reqElementsPlus.length - 1] = "";
233                    }
234                }
235                if (matchPath(pathPattern, reqElementsPlus)) {
236                    return prefixSegs[pathIdx];
237                }
238            } else {
239                if (matchPath(pathPattern, reqElements)) {
240                    return prefixSegs[pathIdx];
241                }
242            }
243        }
244        return -1;
245    }
246
247    /**
248     * Matches the given pattern against the given resource URI.
249     * 
250     * @param pattern the pattern to match
251     * @param resource the URI specifying the resource to match
252     * @return {@code true} if the resource URI matches
253     * @throws ParseException if an invalid pattern is specified
254     */
255    public static boolean matches(String pattern, URI resource)
256            throws ParseException {
257        return (new ResourcePattern(pattern)).matches(resource) >= 0;
258    }
259
260    @SuppressWarnings("PMD.UseVarargs")
261    private static boolean lastIsEmpty(String[] elements) {
262        return elements.length > 0
263            && elements[elements.length - 1].length() == 0;
264    }
265
266    @SuppressWarnings({ "PMD.UseVarargs", "PMD.DataflowAnomalyAnalysis",
267        "PMD.PositionLiteralsFirstInComparisons" })
268    private boolean matchPath(String[] patternElements, String[] reqElements) {
269        int pathIdx = 0;
270        int reqIdx = 0;
271        while (true) {
272            if (pathIdx == patternElements.length) {
273                return reqIdx == reqElements.length;
274            }
275            if (reqIdx == reqElements.length) {
276                return false;
277            }
278            String matchElement = patternElements[pathIdx++];
279            if ("**".equals(matchElement)) {
280                return true;
281            }
282            String reqElement = reqElements[reqIdx++];
283            if (!matchElement.equals("*") && !matchElement.equals(reqElement)) {
284                return false;
285            }
286        }
287
288    }
289
290    /**
291     * Removes the given number of segments (and their trailing slashes)
292     * from the beginning of the path. Segments may be empty. This implies 
293     * that invoking this method with a path that starts with a
294     * slash, the first removed segment is the empty segment
295     * preceding the slash and the starting slash. Put differently, 
296     * invoking this method with an absolute path and 1 makes the path
297     * relative.
298     *
299     * @param path the path
300     * @param segments the number of segments to remove
301     * @return the result
302     */
303    public static String removeSegments(String path, int segments) {
304        return PathSpliterator.stream(path)
305            .skip(segments).collect(Collectors.joining("/"));
306    }
307
308    /**
309     * Splits the given path in a prefix with the given number of
310     * segments and the rest. Like {{@link #removeSegments(String, int)}
311     * but additionally returning the removed segments.
312     *
313     * @param path the path
314     * @param segments the number of segments in the prefi
315     * @return the prefix and the rest
316     */
317    @SuppressWarnings({ "PMD.AssignmentInOperand",
318        "PMD.AvoidLiteralsInIfCondition" })
319    public static String[] split(String path, int segments) {
320        StringBuilder prefix = new StringBuilder();
321        StringBuilder suffix = new StringBuilder();
322        int[] count = { 0 };
323        PathSpliterator.stream(path).forEach(seg -> {
324            if (count[0]++ < segments) {
325                if (count[0] > 1) {
326                    prefix.append('/');
327                }
328                prefix.append(seg);
329            } else {
330                if (count[0] > segments + 1) {
331                    suffix.append('/');
332                }
333                suffix.append(seg);
334            }
335        });
336        return new String[] { prefix.toString(), suffix.toString() };
337    }
338
339    /**
340     * If the URI matches, returns the path split according to 
341     * the matched pattern (see {@link #split(String, int)}).
342     *
343     * @param resource the resource
344     * @return the result.
345     */
346    public Optional<String[]> splitPath(URI resource) {
347        int matchRes = matches(resource);
348        if (matchRes < 0) {
349            return Optional.empty();
350        }
351        return Optional.of(split(resource.getPath(), matchRes + 1));
352    }
353
354    /**
355     * If the URI matches, returns the path without prefix as specified
356     * by the matched pattern (see {@link #split(String, int)}).
357     *
358     * @param resource the resource
359     * @return the result.
360     */
361    @SuppressWarnings("PMD.AssignmentInOperand")
362    public Optional<String> pathRemainder(URI resource) {
363        int matchRes = matches(resource);
364        if (matchRes < 0) {
365            return Optional.empty();
366        }
367        int segments = matchRes + 1;
368        StringBuilder suffix = new StringBuilder();
369        int[] count = { 0 };
370        PathSpliterator.stream(resource.getPath()).forEach(seg -> {
371            if (count[0]++ >= segments) {
372                if (count[0] > segments + 1) {
373                    suffix.append('/');
374                }
375                suffix.append(seg);
376            }
377        });
378        return Optional.of(suffix.toString());
379    }
380
381    /*
382     * (non-Javadoc)
383     * 
384     * @see java.lang.Object#toString()
385     */
386    @Override
387    public String toString() {
388        StringBuilder builder = new StringBuilder(30);
389        builder.append("ResourcePattern [")
390            .append(pattern)
391            .append(']');
392        return builder.toString();
393    }
394
395    /*
396     * (non-Javadoc)
397     * 
398     * @see java.lang.Object#hashCode()
399     */
400    @Override
401    @SuppressWarnings("PMD.DataflowAnomalyAnalysis")
402    public int hashCode() {
403        @SuppressWarnings("PMD.AvoidFinalLocalVariable")
404        final int prime = 31;
405        int result = 1;
406        result = prime * result + ((pattern == null) ? 0 : pattern.hashCode());
407        return result;
408    }
409
410    /*
411     * (non-Javadoc)
412     * 
413     * @see java.lang.Object#equals(java.lang.Object)
414     */
415    @Override
416    public boolean equals(Object obj) {
417        if (this == obj) {
418            return true;
419        }
420        if (obj == null) {
421            return false;
422        }
423        if (getClass() != obj.getClass()) {
424            return false;
425        }
426        ResourcePattern other = (ResourcePattern) obj;
427        if (pattern == null) {
428            if (other.pattern != null) {
429                return false;
430            }
431        } else if (!pattern.equals(other.pattern)) {
432            return false;
433        }
434        return true;
435    }
436
437    /**
438     * Returns the segments of the path. If the path starts with a slash,
439     * an empty string is returned as first segment. If the path ends 
440     * with a slash, an empty string is returned as final segment.
441     */
442    public static class PathSpliterator extends AbstractSpliterator<String> {
443        private StringTokenizer tokenizer;
444        private boolean pendingLeadingEmpty;
445        private boolean pendingTrailingEmpty;
446
447        /**
448         * Creates a new stream for the given path, using "/"
449         * as path separator.
450         * 
451         * @param path the path
452         */
453        public static Stream<String> stream(String path) {
454            return StreamSupport.stream(new PathSpliterator(path), false);
455        }
456
457        /**
458         * Creates a new stream for the given path, using 
459         * the characters from delimiters as seperators.
460         * 
461         * @param path the path
462         * @param delimiters the delimiters
463         */
464        public static Stream<String> stream(String path, String delimiters) {
465            return StreamSupport.stream(
466                new PathSpliterator(path, delimiters), false);
467        }
468
469        /**
470         * Creates a new spliterator for the given path, using "/"
471         * as path separator.
472         * 
473         * @param path the path
474         */
475        public PathSpliterator(String path) {
476            this(path, "/");
477        }
478
479        /**
480         * Creates a new spliterator for the given path, using 
481         * the characters from delimiters as seperators.
482         * 
483         * @param path the path
484         * @param delimiters the delimiters
485         */
486        public PathSpliterator(String path, String delimiters) {
487            super(Long.MAX_VALUE, Spliterator.ORDERED
488                | Spliterator.IMMUTABLE);
489            tokenizer = new StringTokenizer(path, delimiters);
490            pendingLeadingEmpty = path.startsWith("/");
491            pendingTrailingEmpty = path.endsWith("/");
492        }
493
494        /*
495         * (non-Javadoc)
496         * 
497         * @see java.util.Spliterator#tryAdvance(java.util.function.Consumer)
498         */
499        @Override
500        public boolean tryAdvance(Consumer<? super String> consumer) {
501            if (tokenizer == null) {
502                return false;
503            }
504            if (pendingLeadingEmpty) {
505                pendingLeadingEmpty = false;
506                consumer.accept("");
507                return true;
508            }
509            if (tokenizer.hasMoreTokens()) {
510                consumer.accept(tokenizer.nextToken());
511                return true;
512            }
513            tokenizer = null;
514            if (pendingTrailingEmpty) {
515                consumer.accept("");
516                return true;
517            }
518            return false;
519        }
520
521    }
522}