001/*
002 * This file is part of the JDrupes non-blocking HTTP Codec
003 * Copyright (C) 2016, 2017  Michael N. Lipp
004 *
005 * This program is free software; you can redistribute it and/or modify it 
006 * under the terms of the GNU Lesser General Public License as published
007 * by the Free Software Foundation; either version 3 of the License, or 
008 * (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful, but 
011 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
012 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 
013 * License for more details.
014 *
015 * You should have received a copy of the GNU Lesser General Public License along 
016 * with this program; if not, see <http://www.gnu.org/licenses/>.
017 */
018
019package org.jdrupes.httpcodec.protocols.http;
020
021import java.io.UnsupportedEncodingException;
022import java.nio.Buffer;
023import java.nio.ByteBuffer;
024import java.nio.CharBuffer;
025import java.nio.charset.Charset;
026import java.nio.charset.CoderResult;
027import java.text.ParseException;
028import java.util.Optional;
029import java.util.Stack;
030
031import org.jdrupes.httpcodec.Decoder;
032import org.jdrupes.httpcodec.Encoder;
033import org.jdrupes.httpcodec.MessageHeader;
034import org.jdrupes.httpcodec.ProtocolException;
035
036import static org.jdrupes.httpcodec.protocols.http.HttpConstants.*;
037
038import org.jdrupes.httpcodec.types.Converters;
039import org.jdrupes.httpcodec.types.CookieList;
040import org.jdrupes.httpcodec.types.MultiValueConverter;
041import org.jdrupes.httpcodec.types.StringList;
042import org.jdrupes.httpcodec.util.ByteBufferUtils;
043import org.jdrupes.httpcodec.util.DynamicByteArray;
044import org.jdrupes.httpcodec.util.OptimizedCharsetDecoder;
045
046/**
047 * Implements a decoder for HTTP. The class can be used as base class for both
048 * a request and a response decoder.
049 * 
050 * @param <T> the type of the message header to be decoded
051 * @param <R> the type of the response message header
052 */
053public abstract class HttpDecoder<T extends HttpMessageHeader,
054        R extends HttpMessageHeader>
055        extends HttpCodec<T> implements Decoder<T, R> {
056
057    protected static final String SP = "[ \\t]+";
058    protected static final String HTTP_VERSION = "HTTP/\\d+\\.\\d";
059
060    private enum State {
061        // Main states
062        AWAIT_MESSAGE_START, HEADER_LINE_RECEIVED, COPY_UNTIL_CLOSED,
063        LENGTH_RECEIVED, CHUNK_START_RECEIVED, CHUNK_END_RECEIVED,
064        CHUNK_TRAILER_LINE_RECEIVED, CLOSED,
065        // Sub states
066        RECEIVE_LINE, AWAIT_LINE_END, COPY_SPECIFIED, FINISH_CHARDECODER,
067        FLUSH_CHARDECODER
068    }
069
070    protected enum BodyMode {
071        NO_BODY, CHUNKED, LENGTH, UNTIL_CLOSE
072    }
073
074    private long maxHeaderLength = 4194304;
075    private Stack<State> states = new Stack<>();
076    private DynamicByteArray lineBuilder = new DynamicByteArray(8192);
077    private String receivedLine;
078    private String headerLine = null;
079    protected HttpProtocol protocolVersion = HttpProtocol.HTTP_1_0;
080    private long headerLength = 0;
081    private T building;
082    private long leftToRead = 0;
083    private OptimizedCharsetDecoder charDecoder = null;
084    protected Encoder<R, T> peerEncoder;
085
086    /**
087     * Creates a new decoder.
088     */
089    public HttpDecoder() {
090        states.push(State.AWAIT_MESSAGE_START);
091        states.push(State.RECEIVE_LINE);
092    }
093
094    public Decoder<T, R> setPeerEncoder(Encoder<R, T> encoder) {
095        peerEncoder = encoder;
096        return this;
097    }
098
099    public boolean isAwaitingMessage() {
100        return states.size() > 0
101            && states.get(0) == State.AWAIT_MESSAGE_START;
102    }
103
104    /**
105     * Returns the result factory for this codec.
106     * 
107     * @return the factory
108     */
109    protected abstract Result.Factory<R> resultFactory();
110
111    /**
112     * Sets the maximum size for the complete header. If the size is exceeded, a
113     * {@link HttpProtocolException} will be thrown. The default size is 4MB
114     * (4194304 Byte).
115     * 
116     * @param maxHeaderLength
117     *            the maxHeaderLength to set
118     */
119    public void setMaxHeaderLength(long maxHeaderLength) {
120        this.maxHeaderLength = maxHeaderLength;
121    }
122
123    /**
124     * Returns the maximum header length.
125     * 
126     * @return the maxHeaderLength
127     */
128    public long maxHeaderLength() {
129        return maxHeaderLength;
130    }
131
132    /**
133     * Returns the message (header) if one exists.
134     * 
135     * @return the result
136     */
137    public Optional<T> header() {
138        return Optional.ofNullable(messageHeader);
139    }
140
141    /**
142     * Returns {@code true} if the decoder does not accept further input because
143     * the processed data indicated that the connection has been or is to be
144     * closed.
145     * 
146     * @return the result
147     */
148    public boolean isClosed() {
149        return states.peek() == State.CLOSED;
150    }
151
152    /**
153     * Informs the derived class about the start of a new message.
154     * 
155     * @param startLine
156     *            the start line (first line) of the message
157     * @return the new HttpMessage object that is to hold the decoded data
158     * @throws HttpProtocolException if the input violates the HTTP
159     */
160    protected abstract T newMessage(String startLine)
161            throws ProtocolException;
162
163    /**
164     * Informs the derived class that the header has been received completely.
165     * 
166     * @param message the message
167     * @return indication how the body will be transferred
168     * @throws HttpProtocolException if the input violates the HTTP
169     */
170    protected abstract BodyMode headerReceived(T message)
171            throws ProtocolException;
172
173    /**
174     * Informs the derived class that a complete message has been received
175     * and the given result will be returned. The derived class may take
176     * additional actions and even modify the result. The default
177     * implementation simply returns the given result.
178     */
179    protected Decoder.Result<R> messageComplete(Decoder.Result<R> result) {
180        return result;
181    }
182
183    /**
184     * Decodes the next chunk of data.
185     * 
186     * @param in
187     *            holds the data to be decoded
188     * @param out
189     *            gets the body data (if any) written to it
190     * @param endOfInput
191     *            {@code true} if there is no input left beyond the data
192     *            currently in the {@code in} buffer (indicates end of body or
193     *            no body at all)
194     * @return the result
195     * @throws ProtocolException
196     *             if the message violates the Protocol
197     */
198    public Decoder.Result<R> decode(ByteBuffer in, Buffer out,
199            boolean endOfInput) throws ProtocolException {
200        try {
201            try {
202                return uncheckedDecode(in, out, endOfInput);
203            } catch (ParseException | NumberFormatException e) {
204                throw new HttpProtocolException(protocolVersion,
205                    HttpStatus.BAD_REQUEST.statusCode(), e.getMessage());
206            }
207        } catch (HttpProtocolException e) {
208            states.clear();
209            states.push(State.CLOSED);
210            throw e;
211        }
212    }
213
214    private Decoder.Result<R> uncheckedDecode(
215            ByteBuffer in, Buffer out, boolean endOfInput)
216            throws ProtocolException, ParseException {
217        while (true) {
218            switch (states.peek()) {
219            // Waiting for CR (start of end of line)
220            case RECEIVE_LINE: {
221                if (!in.hasRemaining()) {
222                    return resultFactory().newResult(false, true);
223                }
224                byte ch = in.get();
225                if (ch == '\r') {
226                    states.pop();
227                    states.push(State.AWAIT_LINE_END);
228                    break;
229                }
230                lineBuilder.append(ch);
231                // RFC 7230 3.2.5
232                if (headerLength + lineBuilder.position() > maxHeaderLength) {
233                    throw new HttpProtocolException(protocolVersion,
234                        HttpStatus.BAD_REQUEST.statusCode(),
235                        "Maximum header size exceeded");
236                }
237                break;
238            }
239            // Waiting for LF (confirmation of end of line)
240            case AWAIT_LINE_END: {
241                if (!in.hasRemaining()) {
242                    return resultFactory().newResult(false, true);
243                }
244                char ch = (char) in.get();
245                if (ch == '\n') {
246                    try {
247                        // RFC 7230 3.2.4
248                        receivedLine = new String(lineBuilder.array(), 0,
249                            lineBuilder.position(), "iso-8859-1");
250                    } catch (UnsupportedEncodingException e) {
251                        // iso-8859-1 is guaranteed to be supported
252                    }
253                    lineBuilder.clear();
254                    states.pop();
255                    break;
256                }
257                throw new HttpProtocolException(protocolVersion,
258                    HttpStatus.BAD_REQUEST.statusCode(),
259                    "CR not followed by LF");
260            }
261            // Waiting for the initial request line
262            case AWAIT_MESSAGE_START:
263                if (receivedLine.isEmpty()) {
264                    // Ignore as recommended by RFC2616/RFC7230
265                    states.push(State.RECEIVE_LINE);
266                    break;
267                }
268                building = newMessage(receivedLine);
269                messageHeader = null;
270                charDecoder = null;
271                states.pop();
272                headerLine = null;
273                states.push(State.HEADER_LINE_RECEIVED);
274                states.push(State.RECEIVE_LINE);
275                break;
276
277            case HEADER_LINE_RECEIVED:
278                if (headerLine != null) {
279                    // RFC 7230 3.2.4
280                    if (!receivedLine.isEmpty()
281                        && (receivedLine.charAt(0) == ' '
282                            || receivedLine.charAt(0) == '\t')) {
283                        headerLine += (" " + receivedLine.substring(1));
284                        states.push(State.RECEIVE_LINE);
285                        break;
286                    }
287                    // Header line complete, evaluate
288                    newHeaderLine();
289                }
290                if (receivedLine.isEmpty()) {
291                    // Body starts
292                    BodyMode bm = headerReceived(building);
293                    adjustToBodyMode(bm);
294                    messageHeader = building;
295                    building = null;
296                    if (!messageHeader.hasPayload()) {
297                        return adjustToEndOfMessage();
298                    }
299                    if (out == null) {
300                        return resultFactory().newResult(true, false);
301                    }
302                    break;
303                }
304                headerLine = receivedLine;
305                states.push(State.RECEIVE_LINE);
306                break;
307
308            case LENGTH_RECEIVED:
309                // We "drop" to this state after COPY_SPECIFIED
310                // if we had a content length field
311                if (out instanceof CharBuffer && charDecoder != null) {
312                    states.push(State.FINISH_CHARDECODER);
313                    break;
314                }
315                states.pop();
316                return adjustToEndOfMessage();
317
318            case CHUNK_START_RECEIVED:
319                // We "drop" to this state when a line has been read
320                String sizeText = receivedLine.split(";")[0];
321                long chunkSize = Long.parseLong(sizeText, 16);
322                if (chunkSize == 0) {
323                    states.pop();
324                    states.push(State.CHUNK_TRAILER_LINE_RECEIVED);
325                    states.push(State.RECEIVE_LINE);
326                    if (out instanceof CharBuffer && charDecoder != null) {
327                        states.push(State.FINISH_CHARDECODER);
328                    }
329                    break;
330                }
331                leftToRead = chunkSize;
332                // We expect the chunk data and the trailing CRLF (empty line)
333                // (which must be skipped). In reverse order:
334                states.push(State.CHUNK_END_RECEIVED);
335                states.push(State.RECEIVE_LINE);
336                states.push(State.COPY_SPECIFIED);
337                break;
338
339            case CHUNK_END_RECEIVED:
340                // We "drop" to this state when the CR/LF after chunk data
341                // has been read. There's nothing to do except to wait for
342                // next chunk
343                if (receivedLine.length() != 0) {
344                    throw new HttpProtocolException(protocolVersion,
345                        HttpStatus.BAD_REQUEST.statusCode(),
346                        "No CRLF after chunk data.");
347                }
348                states.pop();
349                states.push(State.CHUNK_START_RECEIVED);
350                states.push(State.RECEIVE_LINE);
351                break;
352
353            case CHUNK_TRAILER_LINE_RECEIVED:
354                // We "drop" to this state when a line has been read
355                if (!receivedLine.isEmpty()) {
356                    headerLine = receivedLine;
357                    newTrailerLine();
358                    states.push(State.RECEIVE_LINE);
359                    break;
360                }
361                // All chunked data received
362                return adjustToEndOfMessage();
363
364            case COPY_SPECIFIED:
365                // If we get here, leftToRead is greater zero.
366                int initiallyRemaining = in.remaining();
367                if (out == null) {
368                    return resultFactory().newResult(true,
369                        initiallyRemaining <= 0);
370                }
371                CoderResult decRes;
372                if (in.remaining() <= leftToRead) {
373                    decRes = copyBodyData(out, in, in.remaining(), endOfInput);
374                } else {
375                    decRes = copyBodyData(
376                        out, in, (int) leftToRead, endOfInput);
377                }
378                leftToRead -= (initiallyRemaining - in.remaining());
379                if (leftToRead == 0) {
380                    // Everything written (except, maybe, final bytes
381                    // from decoder)
382                    states.pop();
383                    break;
384                }
385                return resultFactory().newResult(
386                    (!out.hasRemaining() && in.hasRemaining())
387                        || (decRes != null && decRes.isOverflow()),
388                    !in.hasRemaining()
389                        || (decRes != null && decRes.isUnderflow()));
390
391            case FINISH_CHARDECODER:
392                if (charDecoder.decode(EMPTY_IN, (CharBuffer) out, true)
393                    .isOverflow()) {
394                    return resultFactory().newResult(true, false);
395                }
396                states.pop();
397                states.push(State.FLUSH_CHARDECODER);
398                break;
399
400            case FLUSH_CHARDECODER:
401                if (charDecoder.flush((CharBuffer) out).isOverflow()) {
402                    return resultFactory().newResult(true, false);
403                }
404                // No longer needed (and no longer usable btw)
405                charDecoder = null;
406                states.pop();
407                break;
408
409            case COPY_UNTIL_CLOSED:
410                if (out == null) {
411                    return resultFactory().newResult(true, false);
412                }
413                decRes = copyBodyData(out, in, in.remaining(), endOfInput);
414                boolean overflow = (!out.hasRemaining() && in.hasRemaining())
415                    || (decRes != null && decRes.isOverflow());
416                if (overflow) {
417                    return resultFactory().newResult(true, false);
418                }
419                if (!endOfInput) {
420                    return resultFactory().newResult(false, true);
421                }
422                // Final input successfully processed.
423                states.pop();
424                states.push(State.CLOSED);
425                if (out instanceof CharBuffer && charDecoder != null) {
426                    // Final flush needed
427                    states.push(State.FINISH_CHARDECODER);
428                }
429                break;
430
431            case CLOSED:
432                in.position(in.limit());
433                return resultFactory().newResult(false, false);
434            }
435        }
436    }
437
438    private void newHeaderLine() throws HttpProtocolException, ParseException {
439        headerLength += headerLine.length() + 2;
440        // RFC 7230 3.2
441        HttpField<?> field;
442        try {
443            field = new HttpField<>(headerLine, Converters.STRING);
444        } catch (ParseException e) {
445            throw new HttpProtocolException(protocolVersion,
446                HttpStatus.BAD_REQUEST.statusCode(), "Invalid header");
447        }
448        if (field.name().equalsIgnoreCase(HttpField.SET_COOKIE)) {
449            field
450                = new HttpField<CookieList>(headerLine, Converters.SET_COOKIE);
451        }
452        switch (field.name()) {
453        case HttpField.CONTENT_LENGTH:
454            // RFC 7230 3.3.3 (3.)
455            if (building.fields()
456                .containsKey(HttpField.TRANSFER_ENCODING)) {
457                field = null;
458                break;
459            }
460            // RFC 7230 3.3.3 (4.)
461            Optional<HttpField<Long>> existing = building.findField(
462                HttpField.CONTENT_LENGTH, Converters.LONG);
463            if (existing.isPresent()) {
464                @SuppressWarnings("unchecked")
465                HttpField<Long> newLength = (HttpField<Long>) field;
466                if (!existing.get().value().equals(newLength.value())) {
467                    throw new HttpProtocolException(protocolVersion,
468                        HttpStatus.BAD_REQUEST);
469                }
470            }
471            break;
472        case HttpField.TRANSFER_ENCODING:
473            // RFC 7230 3.3.3 (3.)
474            building.removeField(HttpField.CONTENT_LENGTH);
475            break;
476        }
477        if (field == null) {
478            return;
479        }
480        addHeaderField(building, field);
481    }
482
483    private void newTrailerLine() throws HttpProtocolException, ParseException {
484        headerLength += headerLine.length() + 2;
485        // RFC 7230 3.2
486        HttpField<?> field;
487        try {
488            field = new HttpField<>(headerLine, Converters.STRING);
489        } catch (ParseException e) {
490            throw new HttpProtocolException(protocolVersion,
491                HttpStatus.BAD_REQUEST.statusCode(), "Invalid header");
492        }
493        // RFC 7230 4.4
494        HttpField<StringList> trailerField = messageHeader
495            .computeIfAbsent(HttpField.TRAILER, Converters.STRING_LIST,
496                StringList::new);
497        if (!trailerField.value().containsIgnoreCase(field.name())) {
498            trailerField.value().add(field.name());
499        }
500        addHeaderField(messageHeader, field);
501    }
502
503    private void addHeaderField(T header, HttpField<?> field)
504            throws HttpProtocolException, ParseException {
505        // RFC 7230 3.2.2
506        var exists = header.findField(field.name(),
507            HttpField.lookupConverter(field.name()));
508        if (exists.isPresent()) {
509            var existing = exists.get();
510            // Duplicate field name is only allowed for value lists
511            if (!(existing.converter() instanceof MultiValueConverter)) {
512                throw new HttpProtocolException(protocolVersion,
513                    HttpStatus.BAD_REQUEST.statusCode(),
514                    "Multiple occurences of single value field "
515                        + field.name());
516            }
517            @SuppressWarnings("unchecked")
518            var converter = (MultiValueConverter<Iterable<Object>,
519                    Object>) existing.converter();
520            HttpField<?> srcField = field;
521            if (field.converter().equals(Converters.STRING)) {
522                // Still default (String), use real converter (same as existing)
523                var converted = new HttpField<>(field.name(),
524                    converter.fromFieldValue((String) field.asFieldValue()),
525                    converter);
526                srcField = converted;
527            }
528            var adder = converter.valueAdder();
529            @SuppressWarnings("unchecked")
530            Iterable<Object> source = (Iterable<Object>) srcField.value();
531            @SuppressWarnings("unchecked")
532            Iterable<Object> target = (Iterable<Object>) existing.value();
533            source.forEach(item -> adder.accept(target, item));
534        } else {
535            header.setField(field);
536        }
537    }
538
539    private void adjustToBodyMode(BodyMode bm) {
540        states.pop();
541        switch (bm) {
542        case UNTIL_CLOSE:
543            states.push(State.COPY_UNTIL_CLOSED);
544            building.setHasPayload(true);
545            break;
546        case CHUNKED:
547            states.push(State.CHUNK_START_RECEIVED);
548            states.push(State.RECEIVE_LINE);
549            building.setHasPayload(true);
550            break;
551        case LENGTH:
552            HttpField<Long> clf = building.findField(
553                HttpField.CONTENT_LENGTH, Converters.LONG).get();
554            leftToRead = clf.value();
555            if (leftToRead > 0) {
556                states.push(State.LENGTH_RECEIVED);
557                states.push(State.COPY_SPECIFIED);
558                building.setHasPayload(true);
559                break;
560            }
561            // Length == 0 means no body, fall through
562        case NO_BODY:
563            building.setHasPayload(false);
564            break;
565        }
566    }
567
568    private CoderResult copyBodyData(
569            Buffer out, ByteBuffer in, int limit, boolean endOfInput) {
570        if (out instanceof ByteBuffer) {
571            ByteBufferUtils.putAsMuchAsPossible((ByteBuffer) out, in, limit);
572            return null;
573        } else if (out instanceof CharBuffer) {
574            if (charDecoder == null) {
575                charDecoder = new OptimizedCharsetDecoder(
576                    Charset.forName(bodyCharset()).newDecoder());
577            }
578            int oldLimit = in.limit();
579            try {
580                if (in.remaining() > limit) {
581                    in.limit(in.position() + limit);
582                }
583                return charDecoder.decode(in, (CharBuffer) out, endOfInput);
584            } finally {
585                in.limit(oldLimit);
586            }
587        } else {
588            throw new IllegalArgumentException(
589                "Only Byte- or CharBuffer are allowed.");
590        }
591    }
592
593    private Decoder.Result<R> adjustToEndOfMessage() {
594        // RFC 7230 6.3
595        Optional<HttpField<StringList>> connection = messageHeader
596            .findField(HttpField.CONNECTION, Converters.STRING_LIST);
597        if (connection.isPresent() && connection.get().value()
598            .stream().anyMatch(s -> s.equalsIgnoreCase("close"))) {
599            states.push(State.CLOSED);
600            return messageComplete(resultFactory().newResult(false, false));
601        }
602        if (messageHeader.protocol().compareTo(HttpProtocol.HTTP_1_1) >= 0) {
603            states.push(State.AWAIT_MESSAGE_START);
604            states.push(State.RECEIVE_LINE);
605            return messageComplete(resultFactory().newResult(false, false));
606        }
607        states.push(State.CLOSED);
608        return messageComplete(resultFactory().newResult(false, false));
609    }
610
611    /**
612     * Results from {@link HttpDecoder} add no additional
613     * information to {@link org.jdrupes.httpcodec.Decoder.Result}. This
614     * class provides only a factory for creating 
615     * the results as required by {@link HttpDecoder}.
616     * 
617     * @param <R> the type of the response message header
618     */
619    public static class Result<R extends MessageHeader>
620            extends Decoder.Result<R> {
621
622        public Result(boolean overflow, boolean underflow,
623                boolean closeConnection, boolean headerCompleted, R response,
624                boolean responseOnly) {
625            super(overflow, underflow, closeConnection, headerCompleted,
626                response,
627                responseOnly);
628        }
629
630        /**
631         * A factory for creating new Results.
632         */
633        protected abstract static class Factory<R extends MessageHeader>
634                extends Decoder.Result.Factory<R> {
635
636            /**
637             * Create a new result. Implementing classes can
638             * obtain the value for 
639             * {@link org.jdrupes.httpcodec.Codec.Result#closeConnection()}
640             * from {@link HttpDecoder#isClosed()}.
641             * 
642             * @param overflow
643             *            {@code true} if the data didn't fit in the out buffer
644             * @param underflow
645             *            {@code true} if more data is expected
646             * @return the result
647             */
648            protected abstract Result<R> newResult(
649                    boolean overflow, boolean underflow);
650        }
651    }
652}