001/* 002 * This file is part of the JDrupes non-blocking HTTP Codec 003 * Copyright (C) 2016, 2017 Michael N. Lipp 004 * 005 * This program is free software; you can redistribute it and/or modify it 006 * under the terms of the GNU Lesser General Public License as published 007 * by the Free Software Foundation; either version 3 of the License, or 008 * (at your option) any later version. 009 * 010 * This program is distributed in the hope that it will be useful, but 011 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 012 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 013 * License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public License along 016 * with this program; if not, see <http://www.gnu.org/licenses/>. 017 */ 018 019package org.jdrupes.httpcodec.protocols.http; 020 021import java.io.UnsupportedEncodingException; 022import java.nio.Buffer; 023import java.nio.ByteBuffer; 024import java.nio.CharBuffer; 025import java.nio.charset.Charset; 026import java.nio.charset.CoderResult; 027import java.text.ParseException; 028import java.util.Optional; 029import java.util.Stack; 030 031import org.jdrupes.httpcodec.Decoder; 032import org.jdrupes.httpcodec.Encoder; 033import org.jdrupes.httpcodec.MessageHeader; 034import org.jdrupes.httpcodec.ProtocolException; 035 036import static org.jdrupes.httpcodec.protocols.http.HttpConstants.*; 037 038import org.jdrupes.httpcodec.types.Converters; 039import org.jdrupes.httpcodec.types.CookieList; 040import org.jdrupes.httpcodec.types.MultiValueConverter; 041import org.jdrupes.httpcodec.types.StringList; 042import org.jdrupes.httpcodec.util.ByteBufferUtils; 043import org.jdrupes.httpcodec.util.DynamicByteArray; 044import org.jdrupes.httpcodec.util.OptimizedCharsetDecoder; 045 046/** 047 * Implements a decoder for HTTP. The class can be used as base class for both 048 * a request and a response decoder. 049 * 050 * @param <T> the type of the message header to be decoded 051 * @param <R> the type of the response message header 052 */ 053public abstract class HttpDecoder<T extends HttpMessageHeader, 054 R extends HttpMessageHeader> 055 extends HttpCodec<T> implements Decoder<T, R> { 056 057 protected static final String SP = "[ \\t]+"; 058 protected static final String HTTP_VERSION = "HTTP/\\d+\\.\\d"; 059 060 private enum State { 061 // Main states 062 AWAIT_MESSAGE_START, HEADER_LINE_RECEIVED, COPY_UNTIL_CLOSED, 063 LENGTH_RECEIVED, CHUNK_START_RECEIVED, CHUNK_END_RECEIVED, 064 CHUNK_TRAILER_LINE_RECEIVED, CLOSED, 065 // Sub states 066 RECEIVE_LINE, AWAIT_LINE_END, COPY_SPECIFIED, FINISH_CHARDECODER, 067 FLUSH_CHARDECODER 068 } 069 070 protected enum BodyMode { 071 NO_BODY, CHUNKED, LENGTH, UNTIL_CLOSE 072 } 073 074 private long maxHeaderLength = 4194304; 075 private Stack<State> states = new Stack<>(); 076 private DynamicByteArray lineBuilder = new DynamicByteArray(8192); 077 private String receivedLine; 078 private String headerLine = null; 079 protected HttpProtocol protocolVersion = HttpProtocol.HTTP_1_0; 080 private long headerLength = 0; 081 private T building; 082 private long leftToRead = 0; 083 private OptimizedCharsetDecoder charDecoder = null; 084 protected Encoder<R, T> peerEncoder; 085 086 /** 087 * Creates a new decoder. 088 */ 089 public HttpDecoder() { 090 states.push(State.AWAIT_MESSAGE_START); 091 states.push(State.RECEIVE_LINE); 092 } 093 094 public Decoder<T, R> setPeerEncoder(Encoder<R, T> encoder) { 095 peerEncoder = encoder; 096 return this; 097 } 098 099 public boolean isAwaitingMessage() { 100 return states.size() > 0 101 && states.get(0) == State.AWAIT_MESSAGE_START; 102 } 103 104 /** 105 * Returns the result factory for this codec. 106 * 107 * @return the factory 108 */ 109 protected abstract Result.Factory<R> resultFactory(); 110 111 /** 112 * Sets the maximum size for the complete header. If the size is exceeded, a 113 * {@link HttpProtocolException} will be thrown. The default size is 4MB 114 * (4194304 Byte). 115 * 116 * @param maxHeaderLength 117 * the maxHeaderLength to set 118 */ 119 public void setMaxHeaderLength(long maxHeaderLength) { 120 this.maxHeaderLength = maxHeaderLength; 121 } 122 123 /** 124 * Returns the maximum header length. 125 * 126 * @return the maxHeaderLength 127 */ 128 public long maxHeaderLength() { 129 return maxHeaderLength; 130 } 131 132 /** 133 * Returns the message (header) if one exists. 134 * 135 * @return the result 136 */ 137 public Optional<T> header() { 138 return Optional.ofNullable(messageHeader); 139 } 140 141 /** 142 * Returns {@code true} if the decoder does not accept further input because 143 * the processed data indicated that the connection has been or is to be 144 * closed. 145 * 146 * @return the result 147 */ 148 public boolean isClosed() { 149 return states.peek() == State.CLOSED; 150 } 151 152 /** 153 * Informs the derived class about the start of a new message. 154 * 155 * @param startLine 156 * the start line (first line) of the message 157 * @return the new HttpMessage object that is to hold the decoded data 158 * @throws HttpProtocolException if the input violates the HTTP 159 */ 160 protected abstract T newMessage(String startLine) 161 throws ProtocolException; 162 163 /** 164 * Informs the derived class that the header has been received completely. 165 * 166 * @param message the message 167 * @return indication how the body will be transferred 168 * @throws HttpProtocolException if the input violates the HTTP 169 */ 170 protected abstract BodyMode headerReceived(T message) 171 throws ProtocolException; 172 173 /** 174 * Informs the derived class that a complete message has been received 175 * and the given result will be returned. The derived class may take 176 * additional actions and even modify the result. The default 177 * implementation simply returns the given result. 178 */ 179 protected Decoder.Result<R> messageComplete(Decoder.Result<R> result) { 180 return result; 181 } 182 183 /** 184 * Decodes the next chunk of data. 185 * 186 * @param in 187 * holds the data to be decoded 188 * @param out 189 * gets the body data (if any) written to it 190 * @param endOfInput 191 * {@code true} if there is no input left beyond the data 192 * currently in the {@code in} buffer (indicates end of body or 193 * no body at all) 194 * @return the result 195 * @throws ProtocolException 196 * if the message violates the Protocol 197 */ 198 public Decoder.Result<R> decode(ByteBuffer in, Buffer out, 199 boolean endOfInput) throws ProtocolException { 200 try { 201 try { 202 return uncheckedDecode(in, out, endOfInput); 203 } catch (ParseException | NumberFormatException e) { 204 throw new HttpProtocolException(protocolVersion, 205 HttpStatus.BAD_REQUEST.statusCode(), e.getMessage()); 206 } 207 } catch (HttpProtocolException e) { 208 states.clear(); 209 states.push(State.CLOSED); 210 throw e; 211 } 212 } 213 214 private Decoder.Result<R> uncheckedDecode( 215 ByteBuffer in, Buffer out, boolean endOfInput) 216 throws ProtocolException, ParseException { 217 while (true) { 218 switch (states.peek()) { 219 // Waiting for CR (start of end of line) 220 case RECEIVE_LINE: { 221 if (!in.hasRemaining()) { 222 return resultFactory().newResult(false, true); 223 } 224 byte ch = in.get(); 225 if (ch == '\r') { 226 states.pop(); 227 states.push(State.AWAIT_LINE_END); 228 break; 229 } 230 lineBuilder.append(ch); 231 // RFC 7230 3.2.5 232 if (headerLength + lineBuilder.position() > maxHeaderLength) { 233 throw new HttpProtocolException(protocolVersion, 234 HttpStatus.BAD_REQUEST.statusCode(), 235 "Maximum header size exceeded"); 236 } 237 break; 238 } 239 // Waiting for LF (confirmation of end of line) 240 case AWAIT_LINE_END: { 241 if (!in.hasRemaining()) { 242 return resultFactory().newResult(false, true); 243 } 244 char ch = (char) in.get(); 245 if (ch == '\n') { 246 try { 247 // RFC 7230 3.2.4 248 receivedLine = new String(lineBuilder.array(), 0, 249 lineBuilder.position(), "iso-8859-1"); 250 } catch (UnsupportedEncodingException e) { 251 // iso-8859-1 is guaranteed to be supported 252 } 253 lineBuilder.clear(); 254 states.pop(); 255 break; 256 } 257 throw new HttpProtocolException(protocolVersion, 258 HttpStatus.BAD_REQUEST.statusCode(), 259 "CR not followed by LF"); 260 } 261 // Waiting for the initial request line 262 case AWAIT_MESSAGE_START: 263 if (receivedLine.isEmpty()) { 264 // Ignore as recommended by RFC2616/RFC7230 265 states.push(State.RECEIVE_LINE); 266 break; 267 } 268 building = newMessage(receivedLine); 269 messageHeader = null; 270 charDecoder = null; 271 states.pop(); 272 headerLine = null; 273 states.push(State.HEADER_LINE_RECEIVED); 274 states.push(State.RECEIVE_LINE); 275 break; 276 277 case HEADER_LINE_RECEIVED: 278 if (headerLine != null) { 279 // RFC 7230 3.2.4 280 if (!receivedLine.isEmpty() 281 && (receivedLine.charAt(0) == ' ' 282 || receivedLine.charAt(0) == '\t')) { 283 headerLine += (" " + receivedLine.substring(1)); 284 states.push(State.RECEIVE_LINE); 285 break; 286 } 287 // Header line complete, evaluate 288 newHeaderLine(); 289 } 290 if (receivedLine.isEmpty()) { 291 // Body starts 292 BodyMode bm = headerReceived(building); 293 adjustToBodyMode(bm); 294 messageHeader = building; 295 building = null; 296 if (!messageHeader.hasPayload()) { 297 return adjustToEndOfMessage(); 298 } 299 if (out == null) { 300 return resultFactory().newResult(true, false); 301 } 302 break; 303 } 304 headerLine = receivedLine; 305 states.push(State.RECEIVE_LINE); 306 break; 307 308 case LENGTH_RECEIVED: 309 // We "drop" to this state after COPY_SPECIFIED 310 // if we had a content length field 311 if (out instanceof CharBuffer && charDecoder != null) { 312 states.push(State.FINISH_CHARDECODER); 313 break; 314 } 315 states.pop(); 316 return adjustToEndOfMessage(); 317 318 case CHUNK_START_RECEIVED: 319 // We "drop" to this state when a line has been read 320 String sizeText = receivedLine.split(";")[0]; 321 long chunkSize = Long.parseLong(sizeText, 16); 322 if (chunkSize == 0) { 323 states.pop(); 324 states.push(State.CHUNK_TRAILER_LINE_RECEIVED); 325 states.push(State.RECEIVE_LINE); 326 if (out instanceof CharBuffer && charDecoder != null) { 327 states.push(State.FINISH_CHARDECODER); 328 } 329 break; 330 } 331 leftToRead = chunkSize; 332 // We expect the chunk data and the trailing CRLF (empty line) 333 // (which must be skipped). In reverse order: 334 states.push(State.CHUNK_END_RECEIVED); 335 states.push(State.RECEIVE_LINE); 336 states.push(State.COPY_SPECIFIED); 337 break; 338 339 case CHUNK_END_RECEIVED: 340 // We "drop" to this state when the CR/LF after chunk data 341 // has been read. There's nothing to do except to wait for 342 // next chunk 343 if (receivedLine.length() != 0) { 344 throw new HttpProtocolException(protocolVersion, 345 HttpStatus.BAD_REQUEST.statusCode(), 346 "No CRLF after chunk data."); 347 } 348 states.pop(); 349 states.push(State.CHUNK_START_RECEIVED); 350 states.push(State.RECEIVE_LINE); 351 break; 352 353 case CHUNK_TRAILER_LINE_RECEIVED: 354 // We "drop" to this state when a line has been read 355 if (!receivedLine.isEmpty()) { 356 headerLine = receivedLine; 357 newTrailerLine(); 358 states.push(State.RECEIVE_LINE); 359 break; 360 } 361 // All chunked data received 362 return adjustToEndOfMessage(); 363 364 case COPY_SPECIFIED: 365 // If we get here, leftToRead is greater zero. 366 int initiallyRemaining = in.remaining(); 367 if (out == null) { 368 return resultFactory().newResult(true, 369 initiallyRemaining <= 0); 370 } 371 CoderResult decRes; 372 if (in.remaining() <= leftToRead) { 373 decRes = copyBodyData(out, in, in.remaining(), endOfInput); 374 } else { 375 decRes = copyBodyData( 376 out, in, (int) leftToRead, endOfInput); 377 } 378 leftToRead -= (initiallyRemaining - in.remaining()); 379 if (leftToRead == 0) { 380 // Everything written (except, maybe, final bytes 381 // from decoder) 382 states.pop(); 383 break; 384 } 385 return resultFactory().newResult( 386 (!out.hasRemaining() && in.hasRemaining()) 387 || (decRes != null && decRes.isOverflow()), 388 !in.hasRemaining() 389 || (decRes != null && decRes.isUnderflow())); 390 391 case FINISH_CHARDECODER: 392 if (charDecoder.decode(EMPTY_IN, (CharBuffer) out, true) 393 .isOverflow()) { 394 return resultFactory().newResult(true, false); 395 } 396 states.pop(); 397 states.push(State.FLUSH_CHARDECODER); 398 break; 399 400 case FLUSH_CHARDECODER: 401 if (charDecoder.flush((CharBuffer) out).isOverflow()) { 402 return resultFactory().newResult(true, false); 403 } 404 // No longer needed (and no longer usable btw) 405 charDecoder = null; 406 states.pop(); 407 break; 408 409 case COPY_UNTIL_CLOSED: 410 if (out == null) { 411 return resultFactory().newResult(true, false); 412 } 413 decRes = copyBodyData(out, in, in.remaining(), endOfInput); 414 boolean overflow = (!out.hasRemaining() && in.hasRemaining()) 415 || (decRes != null && decRes.isOverflow()); 416 if (overflow) { 417 return resultFactory().newResult(true, false); 418 } 419 if (!endOfInput) { 420 return resultFactory().newResult(false, true); 421 } 422 // Final input successfully processed. 423 states.pop(); 424 states.push(State.CLOSED); 425 if (out instanceof CharBuffer && charDecoder != null) { 426 // Final flush needed 427 states.push(State.FINISH_CHARDECODER); 428 } 429 break; 430 431 case CLOSED: 432 in.position(in.limit()); 433 return resultFactory().newResult(false, false); 434 } 435 } 436 } 437 438 private void newHeaderLine() throws HttpProtocolException, ParseException { 439 headerLength += headerLine.length() + 2; 440 // RFC 7230 3.2 441 HttpField<?> field; 442 try { 443 field = new HttpField<>(headerLine, Converters.STRING); 444 } catch (ParseException e) { 445 throw new HttpProtocolException(protocolVersion, 446 HttpStatus.BAD_REQUEST.statusCode(), "Invalid header"); 447 } 448 if (field.name().equalsIgnoreCase(HttpField.SET_COOKIE)) { 449 field 450 = new HttpField<CookieList>(headerLine, Converters.SET_COOKIE); 451 } 452 switch (field.name()) { 453 case HttpField.CONTENT_LENGTH: 454 // RFC 7230 3.3.3 (3.) 455 if (building.fields() 456 .containsKey(HttpField.TRANSFER_ENCODING)) { 457 field = null; 458 break; 459 } 460 // RFC 7230 3.3.3 (4.) 461 Optional<HttpField<Long>> existing = building.findField( 462 HttpField.CONTENT_LENGTH, Converters.LONG); 463 if (existing.isPresent()) { 464 @SuppressWarnings("unchecked") 465 HttpField<Long> newLength = (HttpField<Long>) field; 466 if (!existing.get().value().equals(newLength.value())) { 467 throw new HttpProtocolException(protocolVersion, 468 HttpStatus.BAD_REQUEST); 469 } 470 } 471 break; 472 case HttpField.TRANSFER_ENCODING: 473 // RFC 7230 3.3.3 (3.) 474 building.removeField(HttpField.CONTENT_LENGTH); 475 break; 476 } 477 if (field == null) { 478 return; 479 } 480 addHeaderField(building, field); 481 } 482 483 private void newTrailerLine() throws HttpProtocolException, ParseException { 484 headerLength += headerLine.length() + 2; 485 // RFC 7230 3.2 486 HttpField<?> field; 487 try { 488 field = new HttpField<>(headerLine, Converters.STRING); 489 } catch (ParseException e) { 490 throw new HttpProtocolException(protocolVersion, 491 HttpStatus.BAD_REQUEST.statusCode(), "Invalid header"); 492 } 493 // RFC 7230 4.4 494 HttpField<StringList> trailerField = messageHeader 495 .computeIfAbsent(HttpField.TRAILER, Converters.STRING_LIST, 496 StringList::new); 497 if (!trailerField.value().containsIgnoreCase(field.name())) { 498 trailerField.value().add(field.name()); 499 } 500 addHeaderField(messageHeader, field); 501 } 502 503 private void addHeaderField(T header, HttpField<?> field) 504 throws HttpProtocolException, ParseException { 505 // RFC 7230 3.2.2 506 var exists = header.findField(field.name(), 507 HttpField.lookupConverter(field.name())); 508 if (exists.isPresent()) { 509 var existing = exists.get(); 510 // Duplicate field name is only allowed for value lists 511 if (!(existing.converter() instanceof MultiValueConverter)) { 512 throw new HttpProtocolException(protocolVersion, 513 HttpStatus.BAD_REQUEST.statusCode(), 514 "Multiple occurences of single value field " 515 + field.name()); 516 } 517 @SuppressWarnings("unchecked") 518 var converter = (MultiValueConverter<Iterable<Object>, 519 Object>) existing.converter(); 520 HttpField<?> srcField = field; 521 if (field.converter().equals(Converters.STRING)) { 522 // Still default (String), use real converter (same as existing) 523 var converted = new HttpField<>(field.name(), 524 converter.fromFieldValue((String) field.asFieldValue()), 525 converter); 526 srcField = converted; 527 } 528 var adder = converter.valueAdder(); 529 @SuppressWarnings("unchecked") 530 Iterable<Object> source = (Iterable<Object>) srcField.value(); 531 @SuppressWarnings("unchecked") 532 Iterable<Object> target = (Iterable<Object>) existing.value(); 533 source.forEach(item -> adder.accept(target, item)); 534 } else { 535 header.setField(field); 536 } 537 } 538 539 private void adjustToBodyMode(BodyMode bm) { 540 states.pop(); 541 switch (bm) { 542 case UNTIL_CLOSE: 543 states.push(State.COPY_UNTIL_CLOSED); 544 building.setHasPayload(true); 545 break; 546 case CHUNKED: 547 states.push(State.CHUNK_START_RECEIVED); 548 states.push(State.RECEIVE_LINE); 549 building.setHasPayload(true); 550 break; 551 case LENGTH: 552 HttpField<Long> clf = building.findField( 553 HttpField.CONTENT_LENGTH, Converters.LONG).get(); 554 leftToRead = clf.value(); 555 if (leftToRead > 0) { 556 states.push(State.LENGTH_RECEIVED); 557 states.push(State.COPY_SPECIFIED); 558 building.setHasPayload(true); 559 break; 560 } 561 // Length == 0 means no body, fall through 562 case NO_BODY: 563 building.setHasPayload(false); 564 break; 565 } 566 } 567 568 private CoderResult copyBodyData( 569 Buffer out, ByteBuffer in, int limit, boolean endOfInput) { 570 if (out instanceof ByteBuffer) { 571 ByteBufferUtils.putAsMuchAsPossible((ByteBuffer) out, in, limit); 572 return null; 573 } else if (out instanceof CharBuffer) { 574 if (charDecoder == null) { 575 charDecoder = new OptimizedCharsetDecoder( 576 Charset.forName(bodyCharset()).newDecoder()); 577 } 578 int oldLimit = in.limit(); 579 try { 580 if (in.remaining() > limit) { 581 in.limit(in.position() + limit); 582 } 583 return charDecoder.decode(in, (CharBuffer) out, endOfInput); 584 } finally { 585 in.limit(oldLimit); 586 } 587 } else { 588 throw new IllegalArgumentException( 589 "Only Byte- or CharBuffer are allowed."); 590 } 591 } 592 593 private Decoder.Result<R> adjustToEndOfMessage() { 594 // RFC 7230 6.3 595 Optional<HttpField<StringList>> connection = messageHeader 596 .findField(HttpField.CONNECTION, Converters.STRING_LIST); 597 if (connection.isPresent() && connection.get().value() 598 .stream().anyMatch(s -> s.equalsIgnoreCase("close"))) { 599 states.push(State.CLOSED); 600 return messageComplete(resultFactory().newResult(false, false)); 601 } 602 if (messageHeader.protocol().compareTo(HttpProtocol.HTTP_1_1) >= 0) { 603 states.push(State.AWAIT_MESSAGE_START); 604 states.push(State.RECEIVE_LINE); 605 return messageComplete(resultFactory().newResult(false, false)); 606 } 607 states.push(State.CLOSED); 608 return messageComplete(resultFactory().newResult(false, false)); 609 } 610 611 /** 612 * Results from {@link HttpDecoder} add no additional 613 * information to {@link org.jdrupes.httpcodec.Decoder.Result}. This 614 * class provides only a factory for creating 615 * the results as required by {@link HttpDecoder}. 616 * 617 * @param <R> the type of the response message header 618 */ 619 public static class Result<R extends MessageHeader> 620 extends Decoder.Result<R> { 621 622 public Result(boolean overflow, boolean underflow, 623 boolean closeConnection, boolean headerCompleted, R response, 624 boolean responseOnly) { 625 super(overflow, underflow, closeConnection, headerCompleted, 626 response, 627 responseOnly); 628 } 629 630 /** 631 * A factory for creating new Results. 632 */ 633 protected abstract static class Factory<R extends MessageHeader> 634 extends Decoder.Result.Factory<R> { 635 636 /** 637 * Create a new result. Implementing classes can 638 * obtain the value for 639 * {@link org.jdrupes.httpcodec.Codec.Result#closeConnection()} 640 * from {@link HttpDecoder#isClosed()}. 641 * 642 * @param overflow 643 * {@code true} if the data didn't fit in the out buffer 644 * @param underflow 645 * {@code true} if more data is expected 646 * @return the result 647 */ 648 protected abstract Result<R> newResult( 649 boolean overflow, boolean underflow); 650 } 651 } 652}