001/*
002 * This file is part of the JDrupes non-blocking HTTP Codec
003 * Copyright (C) 2016, 2017  Michael N. Lipp
004 *
005 * This program is free software; you can redistribute it and/or modify it 
006 * under the terms of the GNU Lesser General Public License as published
007 * by the Free Software Foundation; either version 3 of the License, or 
008 * (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful, but 
011 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
012 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
013 * License for more details.
014 *
015 * You should have received a copy of the GNU Lesser General Public License along 
016 * with this program; if not, see <http://www.gnu.org/licenses/>.
017 */
018
019package org.jdrupes.httpcodec.protocols.http;
020
021import java.io.UnsupportedEncodingException;
022import java.nio.Buffer;
023import java.nio.ByteBuffer;
024import java.nio.CharBuffer;
025import java.nio.charset.Charset;
026import java.nio.charset.CoderResult;
027import java.text.ParseException;
028import java.util.Optional;
029import java.util.Stack;
030import java.util.function.BiConsumer;
031
032import org.jdrupes.httpcodec.Decoder;
033import org.jdrupes.httpcodec.Encoder;
034import org.jdrupes.httpcodec.MessageHeader;
035import org.jdrupes.httpcodec.ProtocolException;
036
037import static org.jdrupes.httpcodec.protocols.http.HttpConstants.*;
038
039import org.jdrupes.httpcodec.types.Converters;
040import org.jdrupes.httpcodec.types.CookieList;
041import org.jdrupes.httpcodec.types.MultiValueConverter;
042import org.jdrupes.httpcodec.types.StringList;
043import org.jdrupes.httpcodec.util.ByteBufferUtils;
044import org.jdrupes.httpcodec.util.DynamicByteArray;
045import org.jdrupes.httpcodec.util.OptimizedCharsetDecoder;
046
047
048/**
049 * Implements a decoder for HTTP. The class can be used as base class for both
050 * a request and a response decoder.
051 * 
052 * @param <T> the type of the message header to be decoded
053 * @param <R> the type of the response message header
054 */
055public abstract class   HttpDecoder<T extends HttpMessageHeader,
056        R extends HttpMessageHeader> 
057        extends HttpCodec<T> implements Decoder<T, R> {
058
059        protected static final String SP = "[ \\t]+";
060        protected static final String HTTP_VERSION = "HTTP/\\d+\\.\\d";
061
062        private enum State {
063            // Main states
064                AWAIT_MESSAGE_START, HEADER_LINE_RECEIVED, COPY_UNTIL_CLOSED, 
065                LENGTH_RECEIVED, CHUNK_START_RECEIVED, CHUNK_END_RECEIVED, 
066                CHUNK_TRAILER_LINE_RECEIVED, CLOSED,
067                // Sub states
068                RECEIVE_LINE, AWAIT_LINE_END, COPY_SPECIFIED, FINISH_CHARDECODER, 
069                FLUSH_CHARDECODER
070        }
071
072        protected enum BodyMode {
073                NO_BODY, CHUNKED, LENGTH, UNTIL_CLOSE
074        }
075
076        private long maxHeaderLength = 4194304;
077        private Stack<State> states = new Stack<>();
078        private DynamicByteArray lineBuilder = new DynamicByteArray(8192);
079        private String receivedLine;
080        private String headerLine = null;
081        protected HttpProtocol protocolVersion = HttpProtocol.HTTP_1_0;
082        private long headerLength = 0;
083        private T building;
084        private long leftToRead = 0;
085        private OptimizedCharsetDecoder charDecoder = null;
086        protected Encoder<R, T> peerEncoder; 
087
088        /**
089         * Creates a new decoder.
090         */
091        public HttpDecoder() {
092                states.push(State.AWAIT_MESSAGE_START);
093                states.push(State.RECEIVE_LINE);
094        }
095
096        public Decoder<T, R> setPeerEncoder(Encoder<R, T> encoder) {
097                peerEncoder = encoder;
098                return this;
099        }
100        
101        public boolean isAwaitingMessage() {
102                return states.size() > 0 
103                                && states.get(0) == State.AWAIT_MESSAGE_START;
104        }
105        
106        /**
107         * Returns the result factory for this codec.
108         * 
109         * @return the factory
110         */
111        protected abstract Result.Factory<R> resultFactory();
112        
113        /**
114         * Sets the maximum size for the complete header. If the size is exceeded, a
115         * {@link HttpProtocolException} will be thrown. The default size is 4MB
116         * (4194304 Byte).
117         * 
118         * @param maxHeaderLength
119         *            the maxHeaderLength to set
120         */
121        public void setMaxHeaderLength(long maxHeaderLength) {
122                this.maxHeaderLength = maxHeaderLength;
123        }
124
125        /**
126         * Returns the maximum header length.
127         * 
128         * @return the maxHeaderLength
129         */
130        public long maxHeaderLength() {
131                return maxHeaderLength;
132        }
133
134        /**
135         * Returns the message (header) if one exists.
136         * 
137         * @return the result
138         */
139        public Optional<T> header() {
140                return Optional.ofNullable(messageHeader);
141        }
142
143        /**
144         * Returns {@code true} if the decoder does not accept further input because
145         * the processed data indicated that the connection has been or is to be
146         * closed.
147         * 
148         * @return the result
149         */
150        public boolean isClosed() {
151                return states.peek() == State.CLOSED;
152        }
153
154        /**
155         * Informs the derived class about the start of a new message.
156         * 
157         * @param startLine
158         *            the start line (first line) of the message
159         * @return the new HttpMessage object that is to hold the decoded data
160         * @throws HttpProtocolException if the input violates the HTTP
161         */
162        protected abstract T newMessage(String startLine)
163                throws ProtocolException;
164
165        /**
166         * Informs the derived class that the header has been received completely.
167         * 
168         * @param message the message
169         * @return indication how the body will be transferred
170         * @throws HttpProtocolException if the input violates the HTTP
171         */
172        protected abstract BodyMode headerReceived(T message) 
173                        throws ProtocolException;
174
175        /**
176         * Informs the derived class that a complete message has been received
177         * and the given result will be returned. The derived class may take
178         * additional actions and even modify the result. The default
179         * implementation simply returns the given result.
180         */
181        protected Decoder.Result<R> messageComplete(Decoder.Result<R> result) {
182                return result;
183        }
184        
185        /**
186         * Decodes the next chunk of data.
187         * 
188         * @param in
189         *            holds the data to be decoded
190         * @param out
191         *            gets the body data (if any) written to it
192         * @param endOfInput
193         *            {@code true} if there is no input left beyond the data
194         *            currently in the {@code in} buffer (indicates end of body or
195         *            no body at all)
196         * @return the result
197         * @throws ProtocolException
198         *             if the message violates the Protocol
199         */
200        public Decoder.Result<R> decode(ByteBuffer in, Buffer out, 
201                        boolean endOfInput) throws ProtocolException {
202                try {
203                        try {
204                                return uncheckedDecode(in, out, endOfInput);
205                        } catch (ParseException | NumberFormatException e) {
206                                throw new HttpProtocolException(protocolVersion,
207                                        HttpStatus.BAD_REQUEST.statusCode(), e.getMessage());
208                        }
209                } catch (HttpProtocolException e) {
210                        states.clear();
211                        states.push(State.CLOSED);
212                        throw e;
213                }
214        }
215
216        private Decoder.Result<R> uncheckedDecode(
217                ByteBuffer in, Buffer out, boolean endOfInput)
218                        throws ProtocolException, ParseException {
219                while(true) {
220                        switch (states.peek()) {
221                        // Waiting for CR (start of end of line)
222                        case RECEIVE_LINE: {
223                                if (!in.hasRemaining()) {
224                                        return resultFactory().newResult(false, true);
225                                }
226                                byte ch = in.get();
227                                if (ch == '\r') {
228                                        states.pop();
229                                        states.push(State.AWAIT_LINE_END);
230                                        break;
231                                }
232                                lineBuilder.append(ch);
233                                // RFC 7230 3.2.5
234                                if (headerLength + lineBuilder.position() > maxHeaderLength) {
235                                        throw new HttpProtocolException(protocolVersion,
236                                                HttpStatus.BAD_REQUEST.statusCode(),
237                                                "Maximum header size exceeded");
238                                }
239                                break;
240                        }
241                        // Waiting for LF (confirmation of end of line)
242                        case AWAIT_LINE_END: {
243                                if (!in.hasRemaining()) {
244                                        return resultFactory().newResult(false, true);
245                                }
246                                char ch = (char) in.get();
247                                if (ch == '\n') {
248                                        try {
249                                                // RFC 7230 3.2.4
250                                                receivedLine = new String(lineBuilder.array(), 0,
251                                                        lineBuilder.position(), "iso-8859-1");
252                                        } catch (UnsupportedEncodingException e) {
253                                                // iso-8859-1 is guaranteed to be supported
254                                        }
255                                        lineBuilder.clear();
256                                        states.pop();
257                                        break;
258                                }
259                                throw new HttpProtocolException(protocolVersion,
260                                        HttpStatus.BAD_REQUEST.statusCode(),
261                                        "CR not followed by LF");
262                        }
263                        // Waiting for the initial request line
264                        case AWAIT_MESSAGE_START:
265                                if (receivedLine.isEmpty()) {
266                                        // Ignore as recommended by RFC2616/RFC7230
267                                        states.push(State.RECEIVE_LINE);
268                                        break;
269                                }
270                                building = newMessage(receivedLine);
271                                messageHeader = null;
272                                charDecoder = null;
273                                states.pop();
274                                headerLine = null;
275                                states.push(State.HEADER_LINE_RECEIVED);
276                                states.push(State.RECEIVE_LINE);
277                                break;
278
279                        case HEADER_LINE_RECEIVED:
280                                if (headerLine != null) {
281                                        // RFC 7230 3.2.4
282                                        if (!receivedLine.isEmpty()
283                                                && (receivedLine.charAt(0) == ' '
284                                                        || receivedLine.charAt(0) == '\t')) {
285                                                headerLine += (" " + receivedLine.substring(1));
286                                                states.push(State.RECEIVE_LINE);
287                                                break;
288                                        }
289                                        // Header line complete, evaluate
290                                        newHeaderLine();
291                                }
292                                if (receivedLine.isEmpty()) {
293                                        // Body starts
294                                        BodyMode bm = headerReceived(building);
295                                        adjustToBodyMode(bm);
296                                        messageHeader = building;
297                                        building = null;
298                                        if (!messageHeader.hasPayload()) {
299                                                return adjustToEndOfMessage();
300                                        }
301                                        if (out == null) {
302                                                return resultFactory().newResult(true, false);
303                                        }
304                                        break;
305                                }
306                                headerLine = receivedLine;
307                                states.push(State.RECEIVE_LINE);
308                                break;
309
310                        case LENGTH_RECEIVED:
311                                // We "drop" to this state after COPY_SPECIFIED
312                                // if we had a content length field
313                                if (out instanceof CharBuffer && charDecoder != null) {
314                                        states.push(State.FINISH_CHARDECODER);
315                                        break;
316                                }
317                                states.pop();
318                                return adjustToEndOfMessage();
319
320                        case CHUNK_START_RECEIVED:
321                                // We "drop" to this state when a line has been read
322                                String sizeText = receivedLine.split(";")[0];
323                                long chunkSize = Long.parseLong(sizeText, 16);
324                                if (chunkSize == 0) {
325                                        states.pop();
326                                        states.push(State.CHUNK_TRAILER_LINE_RECEIVED);
327                                        states.push(State.RECEIVE_LINE);
328                                        if (out instanceof CharBuffer && charDecoder != null) {
329                                                states.push(State.FINISH_CHARDECODER);
330                                        }
331                                        break;
332                                }
333                                leftToRead = chunkSize;
334                                // We expect the chunk data and the trailing CRLF (empty line)
335                                // (which must be skipped). In reverse order:
336                                states.push(State.CHUNK_END_RECEIVED);
337                                states.push(State.RECEIVE_LINE);
338                                states.push(State.COPY_SPECIFIED);
339                                break;
340
341                        case CHUNK_END_RECEIVED:
342                                // We "drop" to this state when the CR/LF after chunk data
343                                // has been read. There's nothing to do except to wait for
344                                // next chunk
345                                if (receivedLine.length() != 0) {
346                                        throw new HttpProtocolException(protocolVersion,
347                                                HttpStatus.BAD_REQUEST.statusCode(),
348                                                "No CRLF after chunk data.");
349                                }
350                                states.pop();
351                                states.push(State.CHUNK_START_RECEIVED);
352                                states.push(State.RECEIVE_LINE);
353                                break;
354
355                        case CHUNK_TRAILER_LINE_RECEIVED:
356                                // We "drop" to this state when a line has been read
357                                if (!receivedLine.isEmpty()) {
358                                        headerLine = receivedLine;
359                                        newTrailerLine();
360                                        states.push(State.RECEIVE_LINE);
361                                        break;
362                                }
363                                // All chunked data received
364                                return adjustToEndOfMessage();
365
366                        case COPY_SPECIFIED:
367                                // If we get here, leftToRead is greater zero.
368                                int initiallyRemaining = in.remaining();
369                                if (out == null) {
370                                        return resultFactory().newResult(true, initiallyRemaining <= 0);
371                                }
372                                CoderResult decRes;
373                                if (in.remaining() <= leftToRead) {
374                                        decRes = copyBodyData(out, in, in.remaining(), endOfInput);
375                                } else {
376                                        decRes = copyBodyData(
377                                                        out, in, (int) leftToRead, endOfInput);
378                                }
379                                leftToRead -= (initiallyRemaining - in.remaining());
380                                if (leftToRead == 0) {
381                                        // Everything written (except, maybe, final bytes 
382                                        // from decoder)
383                                        states.pop();
384                                        break;
385                                }
386                                return resultFactory().newResult(
387                                                (!out.hasRemaining() && in.hasRemaining())
388                                                || (decRes != null && decRes.isOverflow()),
389                                        !in.hasRemaining() 
390                                        || (decRes != null && decRes.isUnderflow()));
391
392                        case FINISH_CHARDECODER:
393                                if (charDecoder.decode(EMPTY_IN, (CharBuffer) out, true)
394                                        .isOverflow()) {
395                                        return resultFactory().newResult(true, false);
396                                }
397                                states.pop();
398                                states.push(State.FLUSH_CHARDECODER);
399                                break;
400                                
401                        case FLUSH_CHARDECODER:
402                                if (charDecoder.flush((CharBuffer)out).isOverflow()) {
403                                        return resultFactory().newResult(true, false);
404                                }
405                                // No longer needed (and no longer usable btw) 
406                                charDecoder = null;
407                                states.pop();
408                                break;
409
410                        case COPY_UNTIL_CLOSED:
411                                if (out == null) {
412                                        return resultFactory().newResult(true, false);
413                                }
414                                decRes = copyBodyData(out, in, in.remaining(), endOfInput);
415                                boolean overflow = (!out.hasRemaining() && in.hasRemaining())
416                                                || (decRes != null && decRes.isOverflow());
417                                if (overflow) {
418                                        return resultFactory().newResult(true, false);
419                                }
420                                if (!endOfInput) {
421                                        return resultFactory().newResult(false, true);
422                                }
423                                // Final input successfully processed.
424                                states.pop();
425                                states.push(State.CLOSED);
426                                if (out instanceof CharBuffer && charDecoder != null) {
427                                        // Final flush needed
428                                        states.push(State.FINISH_CHARDECODER);
429                                }
430                                break;
431
432                        case CLOSED:
433                                in.position(in.limit());
434                                return resultFactory().newResult(false, false);
435                        }
436                }
437        }
438
439        private void newHeaderLine() throws HttpProtocolException, ParseException {
440                headerLength += headerLine.length() + 2;
441                // RFC 7230 3.2
442                HttpField<?> field;
443                try {
444                        field = new HttpField<>(headerLine, Converters.STRING);
445                } catch (ParseException e) {
446                        throw new HttpProtocolException(protocolVersion,
447                                HttpStatus.BAD_REQUEST.statusCode(), "Invalid header");
448                }
449                if (field.name().equalsIgnoreCase(HttpField.SET_COOKIE)) {
450                        field = new HttpField<CookieList>(headerLine, Converters.SET_COOKIE);
451                }
452                switch (field.name()) {
453                case HttpField.CONTENT_LENGTH:
454                        // RFC 7230 3.3.3 (3.)
455                        if (building.fields()
456                                .containsKey(HttpField.TRANSFER_ENCODING)) {
457                                field = null;
458                                break;
459                        }
460                        // RFC 7230 3.3.3 (4.)
461                        Optional<HttpField<Long>> existing = building.findField(
462                                HttpField.CONTENT_LENGTH, Converters.LONG);
463                        if (existing.isPresent()) {
464                                @SuppressWarnings("unchecked")
465                                HttpField<Long> newLength = (HttpField<Long>)field;
466                                if (!existing.get().value().equals(newLength.value())) {
467                                        throw new HttpProtocolException(protocolVersion,
468                                                        HttpStatus.BAD_REQUEST);
469                                }
470                        }
471                        break;
472                case HttpField.TRANSFER_ENCODING:
473                        // RFC 7230 3.3.3 (3.)
474                        building.removeField(HttpField.CONTENT_LENGTH);
475                        break;
476                }
477                if (field == null) {
478                        return;
479                }
480                addHeaderField(building, field);
481        }
482
483        private void newTrailerLine() throws HttpProtocolException, ParseException {
484                headerLength += headerLine.length() + 2;
485                // RFC 7230 3.2
486                HttpField<?> field;
487                try {
488                        field = new HttpField<>(headerLine, Converters.STRING);
489                } catch (ParseException e) {
490                        throw new HttpProtocolException(protocolVersion,
491                                HttpStatus.BAD_REQUEST.statusCode(), "Invalid header");
492                }
493                // RFC 7230 4.4
494                HttpField<StringList> trailerField = messageHeader
495                        .computeIfAbsent(HttpField.TRAILER, Converters.STRING_LIST,
496                                        StringList::new);
497                if (!trailerField.value().containsIgnoreCase(field.name())) {
498                        trailerField.value().add(field.name());
499                }
500                addHeaderField(messageHeader, field);
501        }
502
503        private void addHeaderField(T header, HttpField<?> field)
504                throws HttpProtocolException, ParseException {
505                // RFC 7230 3.2.2
506                HttpField<?> existing = header.fields().get(field.name());
507                if (existing != null) {
508                        if (!(existing.converter() instanceof MultiValueConverter)
509                                        || !existing.converter().equals(field.converter())) {
510                                throw new HttpProtocolException(protocolVersion,
511                                        HttpStatus.BAD_REQUEST.statusCode(),
512                                        "Multiple occurences of field " + field.name());
513                        }
514                        @SuppressWarnings("unchecked")
515                        BiConsumer<Iterable<Object>, Object> adder 
516                                = ((MultiValueConverter<Iterable<Object>, Object>)
517                                                existing.converter()).valueAdder();
518                        @SuppressWarnings("unchecked")
519                        Iterable<Object> source = (Iterable<Object>)field.value();
520                        @SuppressWarnings("unchecked")
521                        Iterable<Object> target = (Iterable<Object>)existing.value();
522                        source.forEach(item -> adder.accept(target, item));
523                } else {
524                        header.setField(field);
525                }
526        }
527
528        private void adjustToBodyMode(BodyMode bm) {
529                states.pop();
530                switch (bm) {
531                case UNTIL_CLOSE:
532                        states.push(State.COPY_UNTIL_CLOSED);
533                        building.setHasPayload(true);
534                        break;
535                case CHUNKED:
536                        states.push(State.CHUNK_START_RECEIVED);
537                        states.push(State.RECEIVE_LINE);
538                        building.setHasPayload(true);
539                        break;
540                case LENGTH:
541                        HttpField<Long> clf = building.findField(
542                                HttpField.CONTENT_LENGTH, Converters.LONG).get();
543                        leftToRead = clf.value();
544                        if (leftToRead > 0) {
545                                states.push(State.LENGTH_RECEIVED);
546                                states.push(State.COPY_SPECIFIED);
547                                building.setHasPayload(true);
548                                break;
549                        }
550                        // Length == 0 means no body, fall through
551                case NO_BODY:
552                        building.setHasPayload(false);
553                        break;
554                }
555        }
556
557        private CoderResult copyBodyData(
558                        Buffer out, ByteBuffer in, int limit, boolean endOfInput) {
559                if (out instanceof ByteBuffer) {
560                        ByteBufferUtils.putAsMuchAsPossible((ByteBuffer) out, in, limit);
561                        return null;
562                } else if (out instanceof CharBuffer) {
563                        if (charDecoder == null) {
564                                charDecoder = new OptimizedCharsetDecoder(
565                                        Charset.forName(bodyCharset()).newDecoder());
566                        }
567                        int oldLimit = in.limit();
568                        try {
569                                if (in.remaining() > limit) {
570                                        in.limit(in.position() + limit);
571                                }
572                                return charDecoder.decode(in, (CharBuffer)out, endOfInput);
573                        } finally {
574                                in.limit(oldLimit);
575                        }
576                } else {
577                        throw new IllegalArgumentException(
578                                "Only Byte- or CharBuffer are allowed.");
579                }
580        }
581
582        private Decoder.Result<R> adjustToEndOfMessage() {
583                // RFC 7230 6.3
584                Optional<HttpField<StringList>> connection = messageHeader
585                                .findField(HttpField.CONNECTION, Converters.STRING_LIST);
586                if (connection.isPresent() && connection.get().value()
587                                .stream().anyMatch(s -> s.equalsIgnoreCase("close"))) {
588                        states.push(State.CLOSED);
589                        return messageComplete(resultFactory().newResult(false, false));
590                }
591                if (messageHeader.protocol().compareTo(HttpProtocol.HTTP_1_1) >= 0) {
592                        states.push(State.AWAIT_MESSAGE_START);
593                        states.push(State.RECEIVE_LINE);
594                        return messageComplete(resultFactory().newResult(false, false));
595                }
596                states.push(State.CLOSED);
597                return messageComplete(resultFactory().newResult(false, false));
598        }
599        
600        /**
601         * Results from {@link HttpDecoder} add no additional
602         * information to {@link org.jdrupes.httpcodec.Decoder.Result}. This
603         * class provides only a factory for creating 
604         * the results as required by {@link HttpDecoder}.
605         * 
606         * @param <R> the type of the response message header
607         */
608        public static class Result<R extends MessageHeader>
609                extends Decoder.Result<R> {
610
611                public Result(boolean overflow, boolean underflow,
612                        boolean closeConnection, boolean headerCompleted, R response,
613                        boolean responseOnly) {
614                        super(overflow, underflow, closeConnection, headerCompleted, response,
615                                responseOnly);
616                }
617
618                /**
619                 * A factory for creating new Results.
620                 */
621                protected abstract static class Factory<R extends MessageHeader> 
622                        extends Decoder.Result.Factory<R> {
623                        
624                        /**
625                         * Create a new result. Implementing classes can
626                         * obtain the value for 
627                         * {@link org.jdrupes.httpcodec.Codec.Result#closeConnection()}
628                         * from {@link HttpDecoder#isClosed()}.
629                         * 
630                         * @param overflow
631                         *            {@code true} if the data didn't fit in the out buffer
632                         * @param underflow
633                         *            {@code true} if more data is expected
634                         * @return the result
635                         */
636                        protected abstract Result<R> newResult(
637                                boolean overflow, boolean underflow);
638                }               
639        }
640}