From f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 1 May 2006 17:11:04 +0000 Subject: libbu++ is finally laid out the way it should be, trunk, branches, and tags. --- Doxyfile | 275 + Makefile | 69 + misc/rfc2060-imap.txt | 4595 +++++++++++++ misc/rfc2616-http.txt | 9859 ++++++++++++++++++++++++++++ src/arraylist.cpp | 100 + src/arraylist.h | 80 + src/cgi.cpp | 644 ++ src/cgi.h | 196 + src/connection.cpp | 432 ++ src/connection.h | 387 ++ src/connectionmanager.cpp | 343 + src/connectionmanager.h | 138 + src/connectionmonitor.cpp | 23 + src/connectionmonitor.h | 41 + src/flexbuf.cpp | 206 + src/flexbuf.h | 160 + src/hashfunction.cpp | 10 + src/hashfunction.h | 48 + src/hashfunctioncasestring.cpp | 39 + src/hashfunctioncasestring.h | 28 + src/hashfunctionint.cpp | 20 + src/hashfunctionint.h | 26 + src/hashfunctionstring.cpp | 36 + src/hashfunctionstring.h | 27 + src/hashtable.cpp | 345 + src/hashtable.h | 299 + src/http.cpp | 371 ++ src/http.h | 271 + src/linkedlist.cpp | 227 + src/linkedlist.h | 87 + src/linkmessage.cpp | 53 + src/linkmessage.h | 39 + src/list.cpp | 27 + src/list.h | 101 + src/md5.cpp | 190 + src/md5.h | 81 + src/multilog.cpp | 143 + src/multilog.h | 145 + src/multilogchannel.cpp | 13 + src/multilogchannel.h | 46 + src/multilogtext.cpp | 152 + src/multilogtext.h | 70 + src/pproc.cpp | 60 + src/pproc.h | 35 + src/pqueue.cpp | 33 + src/pqueue.h | 48 + src/programchain.cpp | 113 + src/programchain.h | 88 + src/programlink.cpp | 71 + src/programlink.h | 99 + src/protocol.cpp | 31 + src/protocol.h | 58 + src/protocoltelnet.cpp | 315 + src/protocoltelnet.h | 77 + src/queue.cpp | 26 + src/queue.h | 45 + src/ringlist.cpp | 106 + src/ringlist.h | 112 + src/stack.cpp | 33 + src/stack.h | 50 + src/test/hashtest.cpp | 107 + src/test/httpsrv/httpconnectionmonitor.cpp | 72 + src/test/httpsrv/httpconnectionmonitor.h | 16 + src/test/httpsrv/main.cpp | 21 + src/test/md5test.cpp | 19 + src/test/teltest/main.cpp | 21 + src/test/teltest/telnetmonitor.cpp | 53 + src/test/teltest/telnetmonitor.h | 26 + src/test/xmlreadtest.cpp | 29 + src/test/xmlrepltest.cpp | 31 + src/test/xmlwritetest.cpp | 41 + src/tokenstring.cpp | 172 + src/tokenstring.h | 120 + src/xmldocument.cpp | 142 + src/xmldocument.h | 163 + src/xmlfilereader.cpp | 63 + src/xmlfilereader.h | 47 + src/xmlfilewriter.cpp | 22 + src/xmlfilewriter.h | 44 + src/xmlnode.cpp | 454 ++ src/xmlnode.h | 236 + src/xmlreader.cpp | 412 ++ src/xmlreader.h | 133 + src/xmlstringreader.cpp | 37 + src/xmlstringreader.h | 49 + src/xmlstringwriter.cpp | 23 + src/xmlstringwriter.h | 50 + src/xmlwriter.cpp | 173 + src/xmlwriter.h | 96 + 89 files changed, 24714 insertions(+) create mode 100644 Doxyfile create mode 100644 Makefile create mode 100644 misc/rfc2060-imap.txt create mode 100644 misc/rfc2616-http.txt create mode 100644 src/arraylist.cpp create mode 100644 src/arraylist.h create mode 100644 src/cgi.cpp create mode 100644 src/cgi.h create mode 100644 src/connection.cpp create mode 100644 src/connection.h create mode 100644 src/connectionmanager.cpp create mode 100644 src/connectionmanager.h create mode 100644 src/connectionmonitor.cpp create mode 100644 src/connectionmonitor.h create mode 100644 src/flexbuf.cpp create mode 100644 src/flexbuf.h create mode 100644 src/hashfunction.cpp create mode 100644 src/hashfunction.h create mode 100644 src/hashfunctioncasestring.cpp create mode 100644 src/hashfunctioncasestring.h create mode 100644 src/hashfunctionint.cpp create mode 100644 src/hashfunctionint.h create mode 100644 src/hashfunctionstring.cpp create mode 100644 src/hashfunctionstring.h create mode 100644 src/hashtable.cpp create mode 100644 src/hashtable.h create mode 100644 src/http.cpp create mode 100644 src/http.h create mode 100644 src/linkedlist.cpp create mode 100644 src/linkedlist.h create mode 100644 src/linkmessage.cpp create mode 100644 src/linkmessage.h create mode 100644 src/list.cpp create mode 100644 src/list.h create mode 100644 src/md5.cpp create mode 100644 src/md5.h create mode 100644 src/multilog.cpp create mode 100644 src/multilog.h create mode 100644 src/multilogchannel.cpp create mode 100644 src/multilogchannel.h create mode 100644 src/multilogtext.cpp create mode 100644 src/multilogtext.h create mode 100644 src/pproc.cpp create mode 100644 src/pproc.h create mode 100644 src/pqueue.cpp create mode 100644 src/pqueue.h create mode 100644 src/programchain.cpp create mode 100644 src/programchain.h create mode 100644 src/programlink.cpp create mode 100644 src/programlink.h create mode 100644 src/protocol.cpp create mode 100644 src/protocol.h create mode 100644 src/protocoltelnet.cpp create mode 100644 src/protocoltelnet.h create mode 100644 src/queue.cpp create mode 100644 src/queue.h create mode 100644 src/ringlist.cpp create mode 100644 src/ringlist.h create mode 100644 src/stack.cpp create mode 100644 src/stack.h create mode 100644 src/test/hashtest.cpp create mode 100644 src/test/httpsrv/httpconnectionmonitor.cpp create mode 100644 src/test/httpsrv/httpconnectionmonitor.h create mode 100644 src/test/httpsrv/main.cpp create mode 100644 src/test/md5test.cpp create mode 100644 src/test/teltest/main.cpp create mode 100644 src/test/teltest/telnetmonitor.cpp create mode 100644 src/test/teltest/telnetmonitor.h create mode 100644 src/test/xmlreadtest.cpp create mode 100644 src/test/xmlrepltest.cpp create mode 100644 src/test/xmlwritetest.cpp create mode 100644 src/tokenstring.cpp create mode 100644 src/tokenstring.h create mode 100644 src/xmldocument.cpp create mode 100644 src/xmldocument.h create mode 100644 src/xmlfilereader.cpp create mode 100644 src/xmlfilereader.h create mode 100644 src/xmlfilewriter.cpp create mode 100644 src/xmlfilewriter.h create mode 100644 src/xmlnode.cpp create mode 100644 src/xmlnode.h create mode 100644 src/xmlreader.cpp create mode 100644 src/xmlreader.h create mode 100644 src/xmlstringreader.cpp create mode 100644 src/xmlstringreader.h create mode 100644 src/xmlstringwriter.cpp create mode 100644 src/xmlstringwriter.h create mode 100644 src/xmlwriter.cpp create mode 100644 src/xmlwriter.h diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..213c87b --- /dev/null +++ b/Doxyfile @@ -0,0 +1,275 @@ +# Doxyfile 1.4.1-KDevelop + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- +PROJECT_NAME = libbu++ +PROJECT_NUMBER = $VERSION$ +OUTPUT_DIRECTORY = api +CREATE_SUBDIRS = NO +OUTPUT_LANGUAGE = English +USE_WINDOWS_ENCODING = NO +BRIEF_MEMBER_DESC = YES +REPEAT_BRIEF = YES +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the +ALWAYS_DETAILED_SEC = NO +INLINE_INHERITED_MEMB = NO +FULL_PATH_NAMES = NO +STRIP_FROM_PATH = /home/eichlan/projects/libbu++/ +STRIP_FROM_INC_PATH = +SHORT_NAMES = NO +JAVADOC_AUTOBRIEF = YES +MULTILINE_CPP_IS_BRIEF = NO +DETAILS_AT_TOP = NO +INHERIT_DOCS = YES +DISTRIBUTE_GROUP_DOC = NO +TAB_SIZE = 4 +ALIASES = +OPTIMIZE_OUTPUT_FOR_C = NO +OPTIMIZE_OUTPUT_JAVA = NO +SUBGROUPING = YES +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- +EXTRACT_ALL = YES +EXTRACT_PRIVATE = YES +EXTRACT_STATIC = YES +EXTRACT_LOCAL_CLASSES = YES +EXTRACT_LOCAL_METHODS = YES +HIDE_UNDOC_MEMBERS = NO +HIDE_UNDOC_CLASSES = NO +HIDE_FRIEND_COMPOUNDS = NO +HIDE_IN_BODY_DOCS = NO +INTERNAL_DOCS = YES +CASE_SENSE_NAMES = YES +HIDE_SCOPE_NAMES = NO +SHOW_INCLUDE_FILES = YES +INLINE_INFO = YES +SORT_MEMBER_DOCS = YES +SORT_BRIEF_DOCS = NO +SORT_BY_SCOPE_NAME = NO +GENERATE_TODOLIST = YES +GENERATE_TESTLIST = YES +GENERATE_BUGLIST = YES +GENERATE_DEPRECATEDLIST= YES +ENABLED_SECTIONS = +MAX_INITIALIZER_LINES = 30 +SHOW_USED_FILES = YES +SHOW_DIRECTORIES = YES +FILE_VERSION_FILTER = +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- +QUIET = NO +WARNINGS = YES +WARN_IF_UNDOCUMENTED = YES +WARN_IF_DOC_ERROR = YES +WARN_NO_PARAMDOC = NO +WARN_FORMAT = "$file:$line: $text" +WARN_LOGFILE = +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- +INPUT = . +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.mm \ + *.dox \ + *.C \ + *.CC \ + *.C++ \ + *.II \ + *.I++ \ + *.H \ + *.HH \ + *.H++ \ + *.CS \ + *.PHP \ + *.PHP3 \ + *.M \ + *.MM \ + *.C \ + *.H \ + *.tlh \ + *.diff \ + *.patch \ + *.moc \ + *.xpm \ + *.dox +RECURSIVE = YES +EXCLUDE = +EXCLUDE_SYMLINKS = NO +EXCLUDE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = * +EXAMPLE_RECURSIVE = NO +IMAGE_PATH = +INPUT_FILTER = +FILTER_PATTERNS = +FILTER_SOURCE_FILES = NO +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- +SOURCE_BROWSER = YES +INLINE_SOURCES = NO +STRIP_CODE_COMMENTS = YES +REFERENCED_BY_RELATION = YES +REFERENCES_RELATION = YES +VERBATIM_HEADERS = YES +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- +ALPHABETICAL_INDEX = YES +COLS_IN_ALPHA_INDEX = 5 +IGNORE_PREFIX = +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- +GENERATE_HTML = YES +HTML_OUTPUT = html +HTML_FILE_EXTENSION = .html +HTML_HEADER = +HTML_FOOTER = +HTML_STYLESHEET = +HTML_ALIGN_MEMBERS = YES +GENERATE_HTMLHELP = NO +CHM_FILE = +HHC_LOCATION = +GENERATE_CHI = NO +BINARY_TOC = NO +TOC_EXPAND = NO +DISABLE_INDEX = NO +ENUM_VALUES_PER_LINE = 4 +GENERATE_TREEVIEW = NO +TREEVIEW_WIDTH = 250 +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- +GENERATE_LATEX = YES +LATEX_OUTPUT = latex +LATEX_CMD_NAME = latex +MAKEINDEX_CMD_NAME = makeindex +COMPACT_LATEX = NO +PAPER_TYPE = letter +EXTRA_PACKAGES = +LATEX_HEADER = +PDF_HYPERLINKS = YES +USE_PDFLATEX = YES +LATEX_BATCHMODE = NO +LATEX_HIDE_INDICES = NO +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- +GENERATE_RTF = NO +RTF_OUTPUT = rtf +COMPACT_RTF = NO +RTF_HYPERLINKS = YES +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- +GENERATE_MAN = NO +MAN_OUTPUT = man +MAN_EXTENSION = .3 +MAN_LINKS = NO +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- +GENERATE_XML = NO +XML_OUTPUT = xml +XML_SCHEMA = +XML_DTD = +XML_PROGRAMLISTING = YES +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- +GENERATE_AUTOGEN_DEF = NO +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- +GENERATE_PERLMOD = NO +PERLMOD_LATEX = YES +PERLMOD_PRETTY = YES +PERLMOD_MAKEVAR_PREFIX = +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- +ENABLE_PREPROCESSING = YES +MACRO_EXPANSION = YES +EXPAND_ONLY_PREDEF = NO +SEARCH_INCLUDES = YES +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = +PREDEFINED = +EXPAND_AS_DEFINED = +SKIP_FUNCTION_MACROS = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- +TAGFILES = +GENERATE_TAGFILE = +ALLEXTERNALS = NO +EXTERNAL_GROUPS = YES +PERL_PATH = /usr/bin/perl +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- +CLASS_DIAGRAMS = YES +HIDE_UNDOC_RELATIONS = YES +HAVE_DOT = YES +CLASS_GRAPH = YES +COLLABORATION_GRAPH = YES +GROUP_GRAPHS = YES +UML_LOOK = NO +TEMPLATE_RELATIONS = NO +INCLUDE_GRAPH = YES +INCLUDED_BY_GRAPH = YES +CALL_GRAPH = NO +GRAPHICAL_HIERARCHY = YES +DIRECTORY_GRAPH = YES +DOT_IMAGE_FORMAT = png +DOT_PATH = +DOTFILE_DIRS = +MAX_DOT_GRAPH_WIDTH = 1024 +MAX_DOT_GRAPH_HEIGHT = 1024 +MAX_DOT_GRAPH_DEPTH = 1000 +DOT_TRANSPARENT = NO +DOT_MULTI_TARGETS = NO +GENERATE_LEGEND = YES +DOT_CLEANUP = YES +#--------------------------------------------------------------------------- +# Configuration::additions related to the search engine +#--------------------------------------------------------------------------- +SEARCHENGINE = NO diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b3d213d --- /dev/null +++ b/Makefile @@ -0,0 +1,69 @@ +OBJS:=$(patsubst %.cpp,%.o,$(wildcard src/*.cpp)) +TOBJS:=$(patsubst %.cpp,%.o,$(wildcard src/test/*.cpp)) +TDIRS:=$(filter-out %.o %.cpp %.h %.d,$(wildcard src/test/*)) +TESTS:=$(patsubst src/test/%.o,%,$(TOBJS)) $(patsubst src/test/%,%,$(TDIRS)) +ATOBJS:=$(TOBJS) $(patsubst %.cpp,%.o,$(foreach dr,$(TDIRS),$(wildcard $(dr)/*.cpp))) +LIB:=libbu++.a +DATE:=$(shell date +%F) + +TXTCPP:="\ g++:\ \ \ " +TXTLNK:="\ \ ld:\ \ \ " +TXTDEP:="\ dep:\ \ \ " +TXTARC:="\ \ ar:\ \ \ " + +.PHONY: all clean dist tests depclean cleanapi +.SILENT: clean $(OBJS) $(TOBJS) $(ATOBJS) $(TESTS) $(patsubst %.o,%.d,$(OBJS) $(TOBJS) $(ATOBJS) $(TESTS)) $(LIB) + +all: libbu++.a tests + +depclean: + -rm $(patsubst %.o,%.d,$(OBJS) $(ATOBJS)) + +-include $(patsubst %.o,%.d,$(OBJS) $(ATOBJS)) + +clean: + -rm $(OBJS) $(ATOBJS) $(TESTS) $(LIB) + +# This bit I cribbed from the docs, seems to work great though! +%.d: %.cpp + g++ $(CXXFLAGS) -Isrc -M $(CPPFLAGS) $< | sed 's,\($(notdir $*)\)\.o[: ]*,$(dir $*)\1.o $@: ,g' > $@ + echo "$(TXTDEP)$@" + +%.o: %.cpp + g++ $(CXXFLAGS) -Isrc $(foreach dr,$(filter $(dir $@),$(foreach ddr,$(TDIRS),$(ddr)/)),-I$(dr)) -ggdb -c -o $@ $< + echo "$(TXTCPP)$@" + +$(LIB): $(OBJS) + ar cr $(LIB) $(OBJS) + echo "$(TXTARC)$@" + +$(TESTS): $(ATOBJS) $(LIB) + g++ $(LDFLAGS) -ggdb $(filter %$@.o, $(TOBJS) ) $(patsubst %.cpp,%.o,$(wildcard $(filter %$@, $(TDIRS))/*.cpp)) -L. -lbu++ -o $@ + echo "$(TXTLNK)$@" + +tests: $(TESTS) + +dist: clean depclean + mkdir libbu++-$(DATE) + cp -a --target-directory=libbu++-$(DATE) Makefile src + tar --exclude=\.svn -c libbu++-$(DATE) | bzip2 -9 > libbu++-$(DATE).tar.bz2 + rm -Rf libbu++-$(DATE) + +install: libbu++.a + cat src/*.h | grep -v "#include \"" - > libbu++.h + cp -a libbu++.a $(PREFIX)/lib + cp -a libbu++.h $(PREFIX)/include + +cleanapi: + -rm -Rf api + +api: $(SRC) + -rm -Rf api + doxygen + make -C api/latex + +#tests: $(TOBJS) $(LIB) +# for file in $(patsubst src/test/%.o,%,$(TOBJS)); \ +# do g++ -o $$file src/test/$$file.o -L. -lbu++; \ +# done + diff --git a/misc/rfc2060-imap.txt b/misc/rfc2060-imap.txt new file mode 100644 index 0000000..cf46159 --- /dev/null +++ b/misc/rfc2060-imap.txt @@ -0,0 +1,4595 @@ + + + + + + +Network Working Group M. Crispin +Request for Comments: 2060 University of Washington +Obsoletes: 1730 December 1996 +Category: Standards Track + + + INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1 + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Abstract + + The Internet Message Access Protocol, Version 4rev1 (IMAP4rev1) + allows a client to access and manipulate electronic mail messages on + a server. IMAP4rev1 permits manipulation of remote message folders, + called "mailboxes", in a way that is functionally equivalent to local + mailboxes. IMAP4rev1 also provides the capability for an offline + client to resynchronize with the server (see also [IMAP-DISC]). + + IMAP4rev1 includes operations for creating, deleting, and renaming + mailboxes; checking for new messages; permanently removing messages; + setting and clearing flags; [RFC-822] and [MIME-IMB] parsing; + searching; and selective fetching of message attributes, texts, and + portions thereof. Messages in IMAP4rev1 are accessed by the use of + numbers. These numbers are either message sequence numbers or unique + identifiers. + + IMAP4rev1 supports a single server. A mechanism for accessing + configuration information to support multiple IMAP4rev1 servers is + discussed in [ACAP]. + + IMAP4rev1 does not specify a means of posting mail; this function is + handled by a mail transfer protocol such as [SMTP]. + + IMAP4rev1 is designed to be upwards compatible from the [IMAP2] and + unpublished IMAP2bis protocols. In the course of the evolution of + IMAP4rev1, some aspects in the earlier protocol have become obsolete. + Obsolete commands, responses, and data formats which an IMAP4rev1 + implementation may encounter when used with an earlier implementation + are described in [IMAP-OBSOLETE]. + + + + + +Crispin Standards Track [Page 1] + +RFC 2060 IMAP4rev1 December 1996 + + + Other compatibility issues with IMAP2bis, the most common variant of + the earlier protocol, are discussed in [IMAP-COMPAT]. A full + discussion of compatibility issues with rare (and presumed extinct) + variants of [IMAP2] is in [IMAP-HISTORICAL]; this document is + primarily of historical interest. + +Table of Contents + +IMAP4rev1 Protocol Specification .................................. 4 +1. How to Read This Document ................................. 4 +1.1. Organization of This Document ............................. 4 +1.2. Conventions Used in This Document ......................... 4 +2. Protocol Overview ......................................... 5 +2.1. Link Level ................................................ 5 +2.2. Commands and Responses .................................... 6 +2.2.1. Client Protocol Sender and Server Protocol Receiver ....... 6 +2.2.2. Server Protocol Sender and Client Protocol Receiver ....... 7 +2.3. Message Attributes ........................................ 7 +2.3.1. Message Numbers ........................................... 7 +2.3.1.1. Unique Identifier (UID) Message Attribute ......... 7 +2.3.1.2. Message Sequence Number Message Attribute ......... 9 +2.3.2. Flags Message Attribute .................................... 9 +2.3.3. Internal Date Message Attribute ........................... 10 +2.3.4. [RFC-822] Size Message Attribute .......................... 11 +2.3.5. Envelope Structure Message Attribute ...................... 11 +2.3.6. Body Structure Message Attribute .......................... 11 +2.4. Message Texts ............................................. 11 +3. State and Flow Diagram .................................... 11 +3.1. Non-Authenticated State ................................... 11 +3.2. Authenticated State ....................................... 11 +3.3. Selected State ............................................ 12 +3.4. Logout State .............................................. 12 +4. Data Formats .............................................. 12 +4.1. Atom ...................................................... 13 +4.2. Number .................................................... 13 +4.3. String ..................................................... 13 +4.3.1. 8-bit and Binary Strings .................................. 13 +4.4. Parenthesized List ........................................ 14 +4.5. NIL ....................................................... 14 +5. Operational Considerations ................................ 14 +5.1. Mailbox Naming ............................................ 14 +5.1.1. Mailbox Hierarchy Naming .................................. 14 +5.1.2. Mailbox Namespace Naming Convention ....................... 14 +5.1.3. Mailbox International Naming Convention ................... 15 +5.2. Mailbox Size and Message Status Updates ................... 16 +5.3. Response when no Command in Progress ...................... 16 +5.4. Autologout Timer .......................................... 16 +5.5. Multiple Commands in Progress ............................. 17 + + + +Crispin Standards Track [Page 2] + +RFC 2060 IMAP4rev1 December 1996 + + +6. Client Commands ........................................... 17 +6.1. Client Commands - Any State ............................... 18 +6.1.1. CAPABILITY Command ........................................ 18 +6.1.2. NOOP Command .............................................. 19 +6.1.3. LOGOUT Command ............................................ 20 +6.2. Client Commands - Non-Authenticated State ................. 20 +6.2.1. AUTHENTICATE Command ...................................... 21 +6.2.2. LOGIN Command ............................................. 22 +6.3. Client Commands - Authenticated State ..................... 22 +6.3.1. SELECT Command ............................................ 23 +6.3.2. EXAMINE Command ........................................... 24 +6.3.3. CREATE Command ............................................ 25 +6.3.4. DELETE Command ............................................ 26 +6.3.5. RENAME Command ............................................ 27 +6.3.6. SUBSCRIBE Command ......................................... 29 +6.3.7. UNSUBSCRIBE Command ....................................... 30 +6.3.8. LIST Command .............................................. 30 +6.3.9. LSUB Command .............................................. 32 +6.3.10. STATUS Command ............................................ 33 +6.3.11. APPEND Command ............................................ 34 +6.4. Client Commands - Selected State .......................... 35 +6.4.1. CHECK Command ............................................. 36 +6.4.2. CLOSE Command ............................................. 36 +6.4.3. EXPUNGE Command ........................................... 37 +6.4.4. SEARCH Command ............................................ 37 +6.4.5. FETCH Command ............................................. 41 +6.4.6. STORE Command ............................................. 45 +6.4.7. COPY Command .............................................. 46 +6.4.8. UID Command ............................................... 47 +6.5. Client Commands - Experimental/Expansion .................. 48 +6.5.1. X Command ........................................... 48 +7. Server Responses .......................................... 48 +7.1. Server Responses - Status Responses ....................... 49 +7.1.1. OK Response ............................................... 51 +7.1.2. NO Response ............................................... 51 +7.1.3. BAD Response .............................................. 52 +7.1.4. PREAUTH Response .......................................... 52 +7.1.5. BYE Response .............................................. 52 +7.2. Server Responses - Server and Mailbox Status .............. 53 +7.2.1. CAPABILITY Response ....................................... 53 +7.2.2. LIST Response .............................................. 54 +7.2.3. LSUB Response ............................................. 55 +7.2.4 STATUS Response ........................................... 55 +7.2.5. SEARCH Response ........................................... 55 +7.2.6. FLAGS Response ............................................ 56 +7.3. Server Responses - Mailbox Size ........................... 56 +7.3.1. EXISTS Response ........................................... 56 +7.3.2. RECENT Response ........................................... 57 + + + +Crispin Standards Track [Page 3] + +RFC 2060 IMAP4rev1 December 1996 + + +7.4. Server Responses - Message Status ......................... 57 +7.4.1. EXPUNGE Response .......................................... 57 +7.4.2. FETCH Response ............................................ 58 +7.5. Server Responses - Command Continuation Request ........... 63 +8. Sample IMAP4rev1 connection ............................... 63 +9. Formal Syntax ............................................. 64 +10. Author's Note ............................................. 74 +11. Security Considerations ................................... 74 +12. Author's Address .......................................... 75 +Appendices ........................................................ 76 +A. References ................................................ 76 +B. Changes from RFC 1730 ..................................... 77 +C. Key Word Index ............................................ 79 + + +IMAP4rev1 Protocol Specification + +1. How to Read This Document + +1.1. Organization of This Document + + This document is written from the point of view of the implementor of + an IMAP4rev1 client or server. Beyond the protocol overview in + section 2, it is not optimized for someone trying to understand the + operation of the protocol. The material in sections 3 through 5 + provides the general context and definitions with which IMAP4rev1 + operates. + + Sections 6, 7, and 9 describe the IMAP commands, responses, and + syntax, respectively. The relationships among these are such that it + is almost impossible to understand any of them separately. In + particular, do not attempt to deduce command syntax from the command + section alone; instead refer to the Formal Syntax section. + +1.2. Conventions Used in This Document + + In examples, "C:" and "S:" indicate lines sent by the client and + server respectively. + + The following terms are used in this document to signify the + requirements of this specification. + + 1) MUST, or the adjective REQUIRED, means that the definition is + an absolute requirement of the specification. + + 2) MUST NOT that the definition is an absolute prohibition of the + specification. + + + + +Crispin Standards Track [Page 4] + +RFC 2060 IMAP4rev1 December 1996 + + + 3) SHOULD means that there may exist valid reasons in particular + circumstances to ignore a particular item, but the full + implications MUST be understood and carefully weighed before + choosing a different course. + + 4) SHOULD NOT means that there may exist valid reasons in + particular circumstances when the particular behavior is + acceptable or even useful, but the full implications SHOULD be + understood and the case carefully weighed before implementing + any behavior described with this label. + + 5) MAY, or the adjective OPTIONAL, means that an item is truly + optional. One vendor may choose to include the item because a + particular marketplace requires it or because the vendor feels + that it enhances the product while another vendor may omit the + same item. An implementation which does not include a + particular option MUST be prepared to interoperate with another + implementation which does include the option. + + "Can" is used instead of "may" when referring to a possible + circumstance or situation, as opposed to an optional facility of + the protocol. + + "User" is used to refer to a human user, whereas "client" refers + to the software being run by the user. + + "Connection" refers to the entire sequence of client/server + interaction from the initial establishment of the network + connection until its termination. "Session" refers to the + sequence of client/server interaction from the time that a mailbox + is selected (SELECT or EXAMINE command) until the time that + selection ends (SELECT or EXAMINE of another mailbox, CLOSE + command, or connection termination). + + Characters are 7-bit US-ASCII unless otherwise specified. Other + character sets are indicated using a "CHARSET", as described in + [MIME-IMT] and defined in [CHARSET]. CHARSETs have important + additional semantics in addition to defining character set; refer + to these documents for more detail. + +2. Protocol Overview + +2.1. Link Level + + The IMAP4rev1 protocol assumes a reliable data stream such as + provided by TCP. When TCP is used, an IMAP4rev1 server listens on + port 143. + + + + +Crispin Standards Track [Page 5] + +RFC 2060 IMAP4rev1 December 1996 + + +2.2. Commands and Responses + + An IMAP4rev1 connection consists of the establishment of a + client/server network connection, an initial greeting from the + server, and client/server interactions. These client/server + interactions consist of a client command, server data, and a server + completion result response. + + All interactions transmitted by client and server are in the form of + lines; that is, strings that end with a CRLF. The protocol receiver + of an IMAP4rev1 client or server is either reading a line, or is + reading a sequence of octets with a known count followed by a line. + +2.2.1. Client Protocol Sender and Server Protocol Receiver + + The client command begins an operation. Each client command is + prefixed with an identifier (typically a short alphanumeric string, + e.g. A0001, A0002, etc.) called a "tag". A different tag is + generated by the client for each command. + + There are two cases in which a line from the client does not + represent a complete command. In one case, a command argument is + quoted with an octet count (see the description of literal in String + under Data Formats); in the other case, the command arguments require + server feedback (see the AUTHENTICATE command). In either case, the + server sends a command continuation request response if it is ready + for the octets (if appropriate) and the remainder of the command. + This response is prefixed with the token "+". + + Note: If, instead, the server detected an error in the command, it + sends a BAD completion response with tag matching the command (as + described below) to reject the command and prevent the client from + sending any more of the command. + + It is also possible for the server to send a completion response + for some other command (if multiple commands are in progress), or + untagged data. In either case, the command continuation request + is still pending; the client takes the appropriate action for the + response, and reads another response from the server. In all + cases, the client MUST send a complete command (including + receiving all command continuation request responses and command + continuations for the command) before initiating a new command. + + The protocol receiver of an IMAP4rev1 server reads a command line + from the client, parses the command and its arguments, and transmits + server data and a server command completion result response. + + + + + +Crispin Standards Track [Page 6] + +RFC 2060 IMAP4rev1 December 1996 + + +2.2.2. Server Protocol Sender and Client Protocol Receiver + + Data transmitted by the server to the client and status responses + that do not indicate command completion are prefixed with the token + "*", and are called untagged responses. + + Server data MAY be sent as a result of a client command, or MAY be + sent unilaterally by the server. There is no syntactic difference + between server data that resulted from a specific command and server + data that were sent unilaterally. + + The server completion result response indicates the success or + failure of the operation. It is tagged with the same tag as the + client command which began the operation. Thus, if more than one + command is in progress, the tag in a server completion response + identifies the command to which the response applies. There are + three possible server completion responses: OK (indicating success), + NO (indicating failure), or BAD (indicating protocol error such as + unrecognized command or command syntax error). + + The protocol receiver of an IMAP4rev1 client reads a response line + from the server. It then takes action on the response based upon the + first token of the response, which can be a tag, a "*", or a "+". + + A client MUST be prepared to accept any server response at all times. + This includes server data that was not requested. Server data SHOULD + be recorded, so that the client can reference its recorded copy + rather than sending a command to the server to request the data. In + the case of certain server data, the data MUST be recorded. + + This topic is discussed in greater detail in the Server Responses + section. + +2.3. Message Attributes + + In addition to message text, each message has several attributes + associated with it. These attributes may be retrieved individually + or in conjunction with other attributes or message texts. + +2.3.1. Message Numbers + + Messages in IMAP4rev1 are accessed by one of two numbers; the unique + identifier and the message sequence number. + +2.3.1.1. Unique Identifier (UID) Message Attribute + + A 32-bit value assigned to each message, which when used with the + unique identifier validity value (see below) forms a 64-bit value + + + +Crispin Standards Track [Page 7] + +RFC 2060 IMAP4rev1 December 1996 + + + that is permanently guaranteed not to refer to any other message in + the mailbox. Unique identifiers are assigned in a strictly ascending + fashion in the mailbox; as each message is added to the mailbox it is + assigned a higher UID than the message(s) which were added + previously. + + Unlike message sequence numbers, unique identifiers are not + necessarily contiguous. Unique identifiers also persist across + sessions. This permits a client to resynchronize its state from a + previous session with the server (e.g. disconnected or offline access + clients); this is discussed further in [IMAP-DISC]. + + Associated with every mailbox is a unique identifier validity value, + which is sent in an UIDVALIDITY response code in an OK untagged + response at mailbox selection time. If unique identifiers from an + earlier session fail to persist to this session, the unique + identifier validity value MUST be greater than the one used in the + earlier session. + + Note: Unique identifiers MUST be strictly ascending in the mailbox + at all times. If the physical message store is re-ordered by a + non-IMAP agent, this requires that the unique identifiers in the + mailbox be regenerated, since the former unique identifers are no + longer strictly ascending as a result of the re-ordering. Another + instance in which unique identifiers are regenerated is if the + message store has no mechanism to store unique identifiers. + Although this specification recognizes that this may be + unavoidable in certain server environments, it STRONGLY ENCOURAGES + message store implementation techniques that avoid this problem. + + Another cause of non-persistance is if the mailbox is deleted and + a new mailbox with the same name is created at a later date, Since + the name is the same, a client may not know that this is a new + mailbox unless the unique identifier validity is different. A + good value to use for the unique identifier validity value is a + 32-bit representation of the creation date/time of the mailbox. + It is alright to use a constant such as 1, but only if it + guaranteed that unique identifiers will never be reused, even in + the case of a mailbox being deleted (or renamed) and a new mailbox + by the same name created at some future time. + + The unique identifier of a message MUST NOT change during the + session, and SHOULD NOT change between sessions. However, if it is + not possible to preserve the unique identifier of a message in a + subsequent session, each subsequent session MUST have a new unique + identifier validity value that is larger than any that was used + previously. + + + + +Crispin Standards Track [Page 8] + +RFC 2060 IMAP4rev1 December 1996 + + +2.3.1.2. Message Sequence Number Message Attribute + + A relative position from 1 to the number of messages in the mailbox. + This position MUST be ordered by ascending unique identifier. As + each new message is added, it is assigned a message sequence number + that is 1 higher than the number of messages in the mailbox before + that new message was added. + + Message sequence numbers can be reassigned during the session. For + example, when a message is permanently removed (expunged) from the + mailbox, the message sequence number for all subsequent messages is + decremented. Similarly, a new message can be assigned a message + sequence number that was once held by some other message prior to an + expunge. + + In addition to accessing messages by relative position in the + mailbox, message sequence numbers can be used in mathematical + calculations. For example, if an untagged "EXISTS 11" is received, + and previously an untagged "8 EXISTS" was received, three new + messages have arrived with message sequence numbers of 9, 10, and 11. + Another example; if message 287 in a 523 message mailbox has UID + 12345, there are exactly 286 messages which have lesser UIDs and 236 + messages which have greater UIDs. + +2.3.2. Flags Message Attribute + + A list of zero or more named tokens associated with the message. A + flag is set by its addition to this list, and is cleared by its + removal. There are two types of flags in IMAP4rev1. A flag of + either type may be permanent or session-only. + + A system flag is a flag name that is pre-defined in this + specification. All system flags begin with "\". Certain system + flags (\Deleted and \Seen) have special semantics described + elsewhere. The currently-defined system flags are: + + \Seen Message has been read + + \Answered Message has been answered + + \Flagged Message is "flagged" for urgent/special attention + + \Deleted Message is "deleted" for removal by later EXPUNGE + + \Draft Message has not completed composition (marked as a + draft). + + + + + +Crispin Standards Track [Page 9] + +RFC 2060 IMAP4rev1 December 1996 + + + \Recent Message is "recently" arrived in this mailbox. This + session is the first session to have been notified + about this message; subsequent sessions will not see + \Recent set for this message. This flag can not be + altered by the client. + + If it is not possible to determine whether or not + this session is the first session to be notified + about a message, then that message SHOULD be + considered recent. + + If multiple connections have the same mailbox + selected simultaneously, it is undefined which of + these connections will see newly-arrives messages + with \Recent set and which will see it without + \Recent set. + + A keyword is defined by the server implementation. Keywords do + not begin with "\". Servers MAY permit the client to define new + keywords in the mailbox (see the description of the + PERMANENTFLAGS response code for more information). + + A flag may be permanent or session-only on a per-flag basis. + Permanent flags are those which the client can add or remove + from the message flags permanently; that is, subsequent sessions + will see any change in permanent flags. Changes to session + flags are valid only in that session. + + Note: The \Recent system flag is a special case of a + session flag. \Recent can not be used as an argument in a + STORE command, and thus can not be changed at all. + +2.3.3. Internal Date Message Attribute + + The internal date and time of the message on the server. This is not + the date and time in the [RFC-822] header, but rather a date and time + which reflects when the message was received. In the case of + messages delivered via [SMTP], this SHOULD be the date and time of + final delivery of the message as defined by [SMTP]. In the case of + messages delivered by the IMAP4rev1 COPY command, this SHOULD be the + internal date and time of the source message. In the case of + messages delivered by the IMAP4rev1 APPEND command, this SHOULD be + the date and time as specified in the APPEND command description. + All other cases are implementation defined. + + + + + + + +Crispin Standards Track [Page 10] + +RFC 2060 IMAP4rev1 December 1996 + + +2.3.4. [RFC-822] Size Message Attribute + + The number of octets in the message, as expressed in [RFC-822] + format. + +2.3.5. Envelope Structure Message Attribute + + A parsed representation of the [RFC-822] envelope information (not to + be confused with an [SMTP] envelope) of the message. + +2.3.6. Body Structure Message Attribute + + A parsed representation of the [MIME-IMB] body structure information + of the message. + +2.4. Message Texts + + In addition to being able to fetch the full [RFC-822] text of a + message, IMAP4rev1 permits the fetching of portions of the full + message text. Specifically, it is possible to fetch the [RFC-822] + message header, [RFC-822] message body, a [MIME-IMB] body part, or a + [MIME-IMB] header. + +3. State and Flow Diagram + + An IMAP4rev1 server is in one of four states. Most commands are + valid in only certain states. It is a protocol error for the client + to attempt a command while the command is in an inappropriate state. + In this case, a server will respond with a BAD or NO (depending upon + server implementation) command completion result. + +3.1. Non-Authenticated State + + In non-authenticated state, the client MUST supply authentication + credentials before most commands will be permitted. This state is + entered when a connection starts unless the connection has been pre- + authenticated. + +3.2. Authenticated State + + In authenticated state, the client is authenticated and MUST select a + mailbox to access before commands that affect messages will be + permitted. This state is entered when a pre-authenticated connection + starts, when acceptable authentication credentials have been + provided, or after an error in selecting a mailbox. + + + + + + +Crispin Standards Track [Page 11] + +RFC 2060 IMAP4rev1 December 1996 + + +3.3. Selected State + + In selected state, a mailbox has been selected to access. This state + is entered when a mailbox has been successfully selected. + +3.4. Logout State + + In logout state, the connection is being terminated, and the server + will close the connection. This state can be entered as a result of + a client request or by unilateral server decision. + + +--------------------------------------+ + |initial connection and server greeting| + +--------------------------------------+ + || (1) || (2) || (3) + VV || || + +-----------------+ || || + |non-authenticated| || || + +-----------------+ || || + || (7) || (4) || || + || VV VV || + || +----------------+ || + || | authenticated |<=++ || + || +----------------+ || || + || || (7) || (5) || (6) || + || || VV || || + || || +--------+ || || + || || |selected|==++ || + || || +--------+ || + || || || (7) || + VV VV VV VV + +--------------------------------------+ + | logout and close connection | + +--------------------------------------+ + + (1) connection without pre-authentication (OK greeting) + (2) pre-authenticated connection (PREAUTH greeting) + (3) rejected connection (BYE greeting) + (4) successful LOGIN or AUTHENTICATE command + (5) successful SELECT or EXAMINE command + (6) CLOSE command, or failed SELECT or EXAMINE command + (7) LOGOUT command, server shutdown, or connection closed + +4. Data Formats + + IMAP4rev1 uses textual commands and responses. Data in IMAP4rev1 can + be in one of several forms: atom, number, string, parenthesized list, + or NIL. + + + +Crispin Standards Track [Page 12] + +RFC 2060 IMAP4rev1 December 1996 + + +4.1. Atom + + An atom consists of one or more non-special characters. + +4.2. Number + + A number consists of one or more digit characters, and represents a + numeric value. + +4.3. String + + A string is in one of two forms: literal and quoted string. The + literal form is the general form of string. The quoted string form + is an alternative that avoids the overhead of processing a literal at + the cost of limitations of characters that can be used in a quoted + string. + + A literal is a sequence of zero or more octets (including CR and LF), + prefix-quoted with an octet count in the form of an open brace ("{"), + the number of octets, close brace ("}"), and CRLF. In the case of + literals transmitted from server to client, the CRLF is immediately + followed by the octet data. In the case of literals transmitted from + client to server, the client MUST wait to receive a command + continuation request (described later in this document) before + sending the octet data (and the remainder of the command). + + A quoted string is a sequence of zero or more 7-bit characters, + excluding CR and LF, with double quote (<">) characters at each end. + + The empty string is represented as either "" (a quoted string with + zero characters between double quotes) or as {0} followed by CRLF (a + literal with an octet count of 0). + + Note: Even if the octet count is 0, a client transmitting a + literal MUST wait to receive a command continuation request. + +4.3.1. 8-bit and Binary Strings + + 8-bit textual and binary mail is supported through the use of a + [MIME-IMB] content transfer encoding. IMAP4rev1 implementations MAY + transmit 8-bit or multi-octet characters in literals, but SHOULD do + so only when the [CHARSET] is identified. + + + + + + + + + +Crispin Standards Track [Page 13] + +RFC 2060 IMAP4rev1 December 1996 + + + Although a BINARY body encoding is defined, unencoded binary strings + are not permitted. A "binary string" is any string with NUL + characters. Implementations MUST encode binary data into a textual + form such as BASE64 before transmitting the data. A string with an + excessive amount of CTL characters MAY also be considered to be + binary. + +4.4. Parenthesized List + + Data structures are represented as a "parenthesized list"; a sequence + of data items, delimited by space, and bounded at each end by + parentheses. A parenthesized list can contain other parenthesized + lists, using multiple levels of parentheses to indicate nesting. + + The empty list is represented as () -- a parenthesized list with no + members. + +4.5. NIL + + The special atom "NIL" represents the non-existence of a particular + data item that is represented as a string or parenthesized list, as + distinct from the empty string "" or the empty parenthesized list (). + +5. Operational Considerations + +5.1. Mailbox Naming + + The interpretation of mailbox names is implementation-dependent. + However, the case-insensitive mailbox name INBOX is a special name + reserved to mean "the primary mailbox for this user on this server". + +5.1.1. Mailbox Hierarchy Naming + + If it is desired to export hierarchical mailbox names, mailbox names + MUST be left-to-right hierarchical using a single character to + separate levels of hierarchy. The same hierarchy separator character + is used for all levels of hierarchy within a single name. + +5.1.2. Mailbox Namespace Naming Convention + + By convention, the first hierarchical element of any mailbox name + which begins with "#" identifies the "namespace" of the remainder of + the name. This makes it possible to disambiguate between different + types of mailbox stores, each of which have their own namespaces. + + + + + + + +Crispin Standards Track [Page 14] + +RFC 2060 IMAP4rev1 December 1996 + + + For example, implementations which offer access to USENET + newsgroups MAY use the "#news" namespace to partition the USENET + newsgroup namespace from that of other mailboxes. Thus, the + comp.mail.misc newsgroup would have an mailbox name of + "#news.comp.mail.misc", and the name "comp.mail.misc" could refer + to a different object (e.g. a user's private mailbox). + +5.1.3. Mailbox International Naming Convention + + By convention, international mailbox names are specified using a + modified version of the UTF-7 encoding described in [UTF-7]. The + purpose of these modifications is to correct the following problems + with UTF-7: + + 1) UTF-7 uses the "+" character for shifting; this conflicts with + the common use of "+" in mailbox names, in particular USENET + newsgroup names. + + 2) UTF-7's encoding is BASE64 which uses the "/" character; this + conflicts with the use of "/" as a popular hierarchy delimiter. + + 3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with + the use of "\" as a popular hierarchy delimiter. + + 4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with + the use of "~" in some servers as a home directory indicator. + + 5) UTF-7 permits multiple alternate forms to represent the same + string; in particular, printable US-ASCII chararacters can be + represented in encoded form. + + In modified UTF-7, printable US-ASCII characters except for "&" + represent themselves; that is, characters with octet values 0x20-0x25 + and 0x27-0x7e. The character "&" (0x26) is represented by the two- + octet sequence "&-". + + All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all + Unicode 16-bit octets) are represented in modified BASE64, with a + further modification from [UTF-7] that "," is used instead of "/". + Modified BASE64 MUST NOT be used to represent any printing US-ASCII + character which can represent itself. + + "&" is used to shift to modified BASE64 and "-" to shift back to US- + ASCII. All names start in US-ASCII, and MUST end in US-ASCII (that + is, a name that ends with a Unicode 16-bit octet MUST end with a "- + "). + + + + + +Crispin Standards Track [Page 15] + +RFC 2060 IMAP4rev1 December 1996 + + + For example, here is a mailbox name which mixes English, Japanese, + and Chinese text: ~peter/mail/&ZeVnLIqe-/&U,BTFw- + +5.2. Mailbox Size and Message Status Updates + + At any time, a server can send data that the client did not request. + Sometimes, such behavior is REQUIRED. For example, agents other than + the server MAY add messages to the mailbox (e.g. new mail delivery), + change the flags of message in the mailbox (e.g. simultaneous access + to the same mailbox by multiple agents), or even remove messages from + the mailbox. A server MUST send mailbox size updates automatically + if a mailbox size change is observed during the processing of a + command. A server SHOULD send message flag updates automatically, + without requiring the client to request such updates explicitly. + Special rules exist for server notification of a client about the + removal of messages to prevent synchronization errors; see the + description of the EXPUNGE response for more detail. + + Regardless of what implementation decisions a client makes on + remembering data from the server, a client implementation MUST record + mailbox size updates. It MUST NOT assume that any command after + initial mailbox selection will return the size of the mailbox. + +5.3. Response when no Command in Progress + + Server implementations are permitted to send an untagged response + (except for EXPUNGE) while there is no command in progress. Server + implementations that send such responses MUST deal with flow control + considerations. Specifically, they MUST either (1) verify that the + size of the data does not exceed the underlying transport's available + window size, or (2) use non-blocking writes. + +5.4. Autologout Timer + + If a server has an inactivity autologout timer, that timer MUST be of + at least 30 minutes' duration. The receipt of ANY command from the + client during that interval SHOULD suffice to reset the autologout + timer. + + + + + + + + + + + + + +Crispin Standards Track [Page 16] + +RFC 2060 IMAP4rev1 December 1996 + + +5.5. Multiple Commands in Progress + + The client MAY send another command without waiting for the + completion result response of a command, subject to ambiguity rules + (see below) and flow control constraints on the underlying data + stream. Similarly, a server MAY begin processing another command + before processing the current command to completion, subject to + ambiguity rules. However, any command continuation request responses + and command continuations MUST be negotiated before any subsequent + command is initiated. + + The exception is if an ambiguity would result because of a command + that would affect the results of other commands. Clients MUST NOT + send multiple commands without waiting if an ambiguity would result. + If the server detects a possible ambiguity, it MUST execute commands + to completion in the order given by the client. + + The most obvious example of ambiguity is when a command would affect + the results of another command; for example, a FETCH of a message's + flags and a STORE of that same message's flags. + + A non-obvious ambiguity occurs with commands that permit an untagged + EXPUNGE response (commands other than FETCH, STORE, and SEARCH), + since an untagged EXPUNGE response can invalidate sequence numbers in + a subsequent command. This is not a problem for FETCH, STORE, or + SEARCH commands because servers are prohibited from sending EXPUNGE + responses while any of those commands are in progress. Therefore, if + the client sends any command other than FETCH, STORE, or SEARCH, it + MUST wait for a response before sending a command with message + sequence numbers. + + For example, the following non-waiting command sequences are invalid: + + FETCH + NOOP + STORE + STORE + COPY + FETCH + COPY + COPY + CHECK + FETCH + + The following are examples of valid non-waiting command sequences: + + FETCH + STORE + SEARCH + CHECK + STORE + COPY + EXPUNGE + +6. Client Commands + + IMAP4rev1 commands are described in this section. Commands are + organized by the state in which the command is permitted. Commands + which are permitted in multiple states are listed in the minimum + + + +Crispin Standards Track [Page 17] + +RFC 2060 IMAP4rev1 December 1996 + + + permitted state (for example, commands valid in authenticated and + selected state are listed in the authenticated state commands). + + Command arguments, identified by "Arguments:" in the command + descriptions below, are described by function, not by syntax. The + precise syntax of command arguments is described in the Formal Syntax + section. + + Some commands cause specific server responses to be returned; these + are identified by "Responses:" in the command descriptions below. + See the response descriptions in the Responses section for + information on these responses, and the Formal Syntax section for the + precise syntax of these responses. It is possible for server data to + be transmitted as a result of any command; thus, commands that do not + specifically require server data specify "no specific responses for + this command" instead of "none". + + The "Result:" in the command description refers to the possible + tagged status responses to a command, and any special interpretation + of these status responses. + +6.1. Client Commands - Any State + + The following commands are valid in any state: CAPABILITY, NOOP, and + LOGOUT. + +6.1.1. CAPABILITY Command + + Arguments: none + + Responses: REQUIRED untagged response: CAPABILITY + + Result: OK - capability completed + BAD - command unknown or arguments invalid + + The CAPABILITY command requests a listing of capabilities that the + server supports. The server MUST send a single untagged + CAPABILITY response with "IMAP4rev1" as one of the listed + capabilities before the (tagged) OK response. This listing of + capabilities is not dependent upon connection state or user. It + is therefore not necessary to issue a CAPABILITY command more than + once in a connection. + + + + + + + + + +Crispin Standards Track [Page 18] + +RFC 2060 IMAP4rev1 December 1996 + + + A capability name which begins with "AUTH=" indicates that the + server supports that particular authentication mechanism. All + such names are, by definition, part of this specification. For + example, the authorization capability for an experimental + "blurdybloop" authenticator would be "AUTH=XBLURDYBLOOP" and not + "XAUTH=BLURDYBLOOP" or "XAUTH=XBLURDYBLOOP". + + Other capability names refer to extensions, revisions, or + amendments to this specification. See the documentation of the + CAPABILITY response for additional information. No capabilities, + beyond the base IMAP4rev1 set defined in this specification, are + enabled without explicit client action to invoke the capability. + + See the section entitled "Client Commands - + Experimental/Expansion" for information about the form of site or + implementation-specific capabilities. + + Example: C: abcd CAPABILITY + S: * CAPABILITY IMAP4rev1 AUTH=KERBEROS_V4 + S: abcd OK CAPABILITY completed + +6.1.2. NOOP Command + + Arguments: none + + Responses: no specific responses for this command (but see below) + + Result: OK - noop completed + BAD - command unknown or arguments invalid + + The NOOP command always succeeds. It does nothing. + + Since any command can return a status update as untagged data, the + NOOP command can be used as a periodic poll for new messages or + message status updates during a period of inactivity. The NOOP + command can also be used to reset any inactivity autologout timer + on the server. + + Example: C: a002 NOOP + S: a002 OK NOOP completed + . . . + C: a047 NOOP + S: * 22 EXPUNGE + S: * 23 EXISTS + S: * 3 RECENT + S: * 14 FETCH (FLAGS (\Seen \Deleted)) + S: a047 OK NOOP completed + + + + +Crispin Standards Track [Page 19] + +RFC 2060 IMAP4rev1 December 1996 + + +6.1.3. LOGOUT Command + + Arguments: none + + Responses: REQUIRED untagged response: BYE + + Result: OK - logout completed + BAD - command unknown or arguments invalid + + The LOGOUT command informs the server that the client is done with + the connection. The server MUST send a BYE untagged response + before the (tagged) OK response, and then close the network + connection. + + Example: C: A023 LOGOUT + S: * BYE IMAP4rev1 Server logging out + S: A023 OK LOGOUT completed + (Server and client then close the connection) + +6.2. Client Commands - Non-Authenticated State + + In non-authenticated state, the AUTHENTICATE or LOGIN command + establishes authentication and enter authenticated state. The + AUTHENTICATE command provides a general mechanism for a variety of + authentication techniques, whereas the LOGIN command uses the + traditional user name and plaintext password pair. + + Server implementations MAY allow non-authenticated access to certain + mailboxes. The convention is to use a LOGIN command with the userid + "anonymous". A password is REQUIRED. It is implementation-dependent + what requirements, if any, are placed on the password and what access + restrictions are placed on anonymous users. + + Once authenticated (including as anonymous), it is not possible to + re-enter non-authenticated state. + + In addition to the universal commands (CAPABILITY, NOOP, and LOGOUT), + the following commands are valid in non-authenticated state: + AUTHENTICATE and LOGIN. + + + + + + + + + + + + +Crispin Standards Track [Page 20] + +RFC 2060 IMAP4rev1 December 1996 + + +6.2.1. AUTHENTICATE Command + + Arguments: authentication mechanism name + + Responses: continuation data can be requested + + Result: OK - authenticate completed, now in authenticated state + NO - authenticate failure: unsupported authentication + mechanism, credentials rejected + BAD - command unknown or arguments invalid, + authentication exchange cancelled + + The AUTHENTICATE command indicates an authentication mechanism, + such as described in [IMAP-AUTH], to the server. If the server + supports the requested authentication mechanism, it performs an + authentication protocol exchange to authenticate and identify the + client. It MAY also negotiate an OPTIONAL protection mechanism + for subsequent protocol interactions. If the requested + authentication mechanism is not supported, the server SHOULD + reject the AUTHENTICATE command by sending a tagged NO response. + + The authentication protocol exchange consists of a series of + server challenges and client answers that are specific to the + authentication mechanism. A server challenge consists of a + command continuation request response with the "+" token followed + by a BASE64 encoded string. The client answer consists of a line + consisting of a BASE64 encoded string. If the client wishes to + cancel an authentication exchange, it issues a line with a single + "*". If the server receives such an answer, it MUST reject the + AUTHENTICATE command by sending a tagged BAD response. + + A protection mechanism provides integrity and privacy protection + to the connection. If a protection mechanism is negotiated, it is + applied to all subsequent data sent over the connection. The + protection mechanism takes effect immediately following the CRLF + that concludes the authentication exchange for the client, and the + CRLF of the tagged OK response for the server. Once the + protection mechanism is in effect, the stream of command and + response octets is processed into buffers of ciphertext. Each + buffer is transferred over the connection as a stream of octets + prepended with a four octet field in network byte order that + represents the length of the following data. The maximum + ciphertext buffer length is defined by the protection mechanism. + + Authentication mechanisms are OPTIONAL. Protection mechanisms are + also OPTIONAL; an authentication mechanism MAY be implemented + without any protection mechanism. If an AUTHENTICATE command + fails with a NO response, the client MAY try another + + + +Crispin Standards Track [Page 21] + +RFC 2060 IMAP4rev1 December 1996 + + + authentication mechanism by issuing another AUTHENTICATE command, + or MAY attempt to authenticate by using the LOGIN command. In + other words, the client MAY request authentication types in + decreasing order of preference, with the LOGIN command as a last + resort. + + Example: S: * OK KerberosV4 IMAP4rev1 Server + C: A001 AUTHENTICATE KERBEROS_V4 + S: + AmFYig== + C: BAcAQU5EUkVXLkNNVS5FRFUAOCAsho84kLN3/IJmrMG+25a4DT + +nZImJjnTNHJUtxAA+o0KPKfHEcAFs9a3CL5Oebe/ydHJUwYFd + WwuQ1MWiy6IesKvjL5rL9WjXUb9MwT9bpObYLGOKi1Qh + S: + or//EoAADZI= + C: DiAF5A4gA+oOIALuBkAAmw== + S: A001 OK Kerberos V4 authentication successful + + Note: the line breaks in the first client answer are for editorial + clarity and are not in real authenticators. + +6.2.2. LOGIN Command + + Arguments: user name + password + + Responses: no specific responses for this command + + Result: OK - login completed, now in authenticated state + NO - login failure: user name or password rejected + BAD - command unknown or arguments invalid + + The LOGIN command identifies the client to the server and carries + the plaintext password authenticating this user. + + Example: C: a001 LOGIN SMITH SESAME + S: a001 OK LOGIN completed + +6.3. Client Commands - Authenticated State + + In authenticated state, commands that manipulate mailboxes as atomic + entities are permitted. Of these commands, the SELECT and EXAMINE + commands will select a mailbox for access and enter selected state. + + In addition to the universal commands (CAPABILITY, NOOP, and LOGOUT), + the following commands are valid in authenticated state: SELECT, + EXAMINE, CREATE, DELETE, RENAME, SUBSCRIBE, UNSUBSCRIBE, LIST, LSUB, + STATUS, and APPEND. + + + + + +Crispin Standards Track [Page 22] + +RFC 2060 IMAP4rev1 December 1996 + + +6.3.1. SELECT Command + + Arguments: mailbox name + + Responses: REQUIRED untagged responses: FLAGS, EXISTS, RECENT + OPTIONAL OK untagged responses: UNSEEN, PERMANENTFLAGS + + Result: OK - select completed, now in selected state + NO - select failure, now in authenticated state: no + such mailbox, can't access mailbox + BAD - command unknown or arguments invalid + + The SELECT command selects a mailbox so that messages in the + mailbox can be accessed. Before returning an OK to the client, + the server MUST send the following untagged data to the client: + + FLAGS Defined flags in the mailbox. See the description + of the FLAGS response for more detail. + + EXISTS The number of messages in the mailbox. See the + description of the EXISTS response for more detail. + + RECENT The number of messages with the \Recent flag set. + See the description of the RECENT response for more + detail. + + OK [UIDVALIDITY ] + The unique identifier validity value. See the + description of the UID command for more detail. + + to define the initial state of the mailbox at the client. + + The server SHOULD also send an UNSEEN response code in an OK + untagged response, indicating the message sequence number of the + first unseen message in the mailbox. + + If the client can not change the permanent state of one or more of + the flags listed in the FLAGS untagged response, the server SHOULD + send a PERMANENTFLAGS response code in an OK untagged response, + listing the flags that the client can change permanently. + + Only one mailbox can be selected at a time in a connection; + simultaneous access to multiple mailboxes requires multiple + connections. The SELECT command automatically deselects any + currently selected mailbox before attempting the new selection. + Consequently, if a mailbox is selected and a SELECT command that + fails is attempted, no mailbox is selected. + + + + +Crispin Standards Track [Page 23] + +RFC 2060 IMAP4rev1 December 1996 + + + If the client is permitted to modify the mailbox, the server + SHOULD prefix the text of the tagged OK response with the + "[READ-WRITE]" response code. + + If the client is not permitted to modify the mailbox but is + permitted read access, the mailbox is selected as read-only, and + the server MUST prefix the text of the tagged OK response to + SELECT with the "[READ-ONLY]" response code. Read-only access + through SELECT differs from the EXAMINE command in that certain + read-only mailboxes MAY permit the change of permanent state on a + per-user (as opposed to global) basis. Netnews messages marked in + a server-based .newsrc file are an example of such per-user + permanent state that can be modified with read-only mailboxes. + + Example: C: A142 SELECT INBOX + S: * 172 EXISTS + S: * 1 RECENT + S: * OK [UNSEEN 12] Message 12 is first unseen + S: * OK [UIDVALIDITY 3857529045] UIDs valid + S: * FLAGS (\Answered \Flagged \Deleted \Seen \Draft) + S: * OK [PERMANENTFLAGS (\Deleted \Seen \*)] Limited + S: A142 OK [READ-WRITE] SELECT completed + +6.3.2. EXAMINE Command + + Arguments: mailbox name + + Responses: REQUIRED untagged responses: FLAGS, EXISTS, RECENT + OPTIONAL OK untagged responses: UNSEEN, PERMANENTFLAGS + + Result: OK - examine completed, now in selected state + NO - examine failure, now in authenticated state: no + such mailbox, can't access mailbox + BAD - command unknown or arguments invalid + + The EXAMINE command is identical to SELECT and returns the same + output; however, the selected mailbox is identified as read-only. + No changes to the permanent state of the mailbox, including + per-user state, are permitted. + + + + + + + + + + + + +Crispin Standards Track [Page 24] + +RFC 2060 IMAP4rev1 December 1996 + + + The text of the tagged OK response to the EXAMINE command MUST + begin with the "[READ-ONLY]" response code. + + Example: C: A932 EXAMINE blurdybloop + S: * 17 EXISTS + S: * 2 RECENT + S: * OK [UNSEEN 8] Message 8 is first unseen + S: * OK [UIDVALIDITY 3857529045] UIDs valid + S: * FLAGS (\Answered \Flagged \Deleted \Seen \Draft) + S: * OK [PERMANENTFLAGS ()] No permanent flags permitted + S: A932 OK [READ-ONLY] EXAMINE completed + +6.3.3. CREATE Command + + Arguments: mailbox name + + Responses: no specific responses for this command + + Result: OK - create completed + NO - create failure: can't create mailbox with that name + BAD - command unknown or arguments invalid + + The CREATE command creates a mailbox with the given name. An OK + response is returned only if a new mailbox with that name has been + created. It is an error to attempt to create INBOX or a mailbox + with a name that refers to an extant mailbox. Any error in + creation will return a tagged NO response. + + If the mailbox name is suffixed with the server's hierarchy + separator character (as returned from the server by a LIST + command), this is a declaration that the client intends to create + mailbox names under this name in the hierarchy. Server + implementations that do not require this declaration MUST ignore + it. + + If the server's hierarchy separator character appears elsewhere in + the name, the server SHOULD create any superior hierarchical names + that are needed for the CREATE command to complete successfully. + In other words, an attempt to create "foo/bar/zap" on a server in + which "/" is the hierarchy separator character SHOULD create foo/ + and foo/bar/ if they do not already exist. + + If a new mailbox is created with the same name as a mailbox which + was deleted, its unique identifiers MUST be greater than any + unique identifiers used in the previous incarnation of the mailbox + UNLESS the new incarnation has a different unique identifier + validity value. See the description of the UID command for more + detail. + + + +Crispin Standards Track [Page 25] + +RFC 2060 IMAP4rev1 December 1996 + + + Example: C: A003 CREATE owatagusiam/ + S: A003 OK CREATE completed + C: A004 CREATE owatagusiam/blurdybloop + S: A004 OK CREATE completed + + Note: the interpretation of this example depends on whether "/" + was returned as the hierarchy separator from LIST. If "/" is the + hierarchy separator, a new level of hierarchy named "owatagusiam" + with a member called "blurdybloop" is created. Otherwise, two + mailboxes at the same hierarchy level are created. + +6.3.4. DELETE Command + + Arguments: mailbox name + + Responses: no specific responses for this command + + Result: OK - delete completed + NO - delete failure: can't delete mailbox with that name + BAD - command unknown or arguments invalid + + The DELETE command permanently removes the mailbox with the given + name. A tagged OK response is returned only if the mailbox has + been deleted. It is an error to attempt to delete INBOX or a + mailbox name that does not exist. + + The DELETE command MUST NOT remove inferior hierarchical names. + For example, if a mailbox "foo" has an inferior "foo.bar" + (assuming "." is the hierarchy delimiter character), removing + "foo" MUST NOT remove "foo.bar". It is an error to attempt to + delete a name that has inferior hierarchical names and also has + the \Noselect mailbox name attribute (see the description of the + LIST response for more details). + + It is permitted to delete a name that has inferior hierarchical + names and does not have the \Noselect mailbox name attribute. In + this case, all messages in that mailbox are removed, and the name + will acquire the \Noselect mailbox name attribute. + + The value of the highest-used unique identifier of the deleted + mailbox MUST be preserved so that a new mailbox created with the + same name will not reuse the identifiers of the former + incarnation, UNLESS the new incarnation has a different unique + identifier validity value. See the description of the UID command + for more detail. + + + + + + +Crispin Standards Track [Page 26] + +RFC 2060 IMAP4rev1 December 1996 + + + Examples: C: A682 LIST "" * + S: * LIST () "/" blurdybloop + S: * LIST (\Noselect) "/" foo + S: * LIST () "/" foo/bar + S: A682 OK LIST completed + C: A683 DELETE blurdybloop + S: A683 OK DELETE completed + C: A684 DELETE foo + S: A684 NO Name "foo" has inferior hierarchical names + C: A685 DELETE foo/bar + S: A685 OK DELETE Completed + C: A686 LIST "" * + S: * LIST (\Noselect) "/" foo + S: A686 OK LIST completed + C: A687 DELETE foo + S: A687 OK DELETE Completed + + + C: A82 LIST "" * + S: * LIST () "." blurdybloop + S: * LIST () "." foo + S: * LIST () "." foo.bar + S: A82 OK LIST completed + C: A83 DELETE blurdybloop + S: A83 OK DELETE completed + C: A84 DELETE foo + S: A84 OK DELETE Completed + C: A85 LIST "" * + S: * LIST () "." foo.bar + S: A85 OK LIST completed + C: A86 LIST "" % + S: * LIST (\Noselect) "." foo + S: A86 OK LIST completed + +6.3.5. RENAME Command + + Arguments: existing mailbox name + new mailbox name + + Responses: no specific responses for this command + + Result: OK - rename completed + NO - rename failure: can't rename mailbox with that name, + can't rename to mailbox with that name + BAD - command unknown or arguments invalid + + The RENAME command changes the name of a mailbox. A tagged OK + response is returned only if the mailbox has been renamed. It is + + + +Crispin Standards Track [Page 27] + +RFC 2060 IMAP4rev1 December 1996 + + + an error to attempt to rename from a mailbox name that does not + exist or to a mailbox name that already exists. Any error in + renaming will return a tagged NO response. + + If the name has inferior hierarchical names, then the inferior + hierarchical names MUST also be renamed. For example, a rename of + "foo" to "zap" will rename "foo/bar" (assuming "/" is the + hierarchy delimiter character) to "zap/bar". + + The value of the highest-used unique identifier of the old mailbox + name MUST be preserved so that a new mailbox created with the same + name will not reuse the identifiers of the former incarnation, + UNLESS the new incarnation has a different unique identifier + validity value. See the description of the UID command for more + detail. + + Renaming INBOX is permitted, and has special behavior. It moves + all messages in INBOX to a new mailbox with the given name, + leaving INBOX empty. If the server implementation supports + inferior hierarchical names of INBOX, these are unaffected by a + rename of INBOX. + + Examples: C: A682 LIST "" * + S: * LIST () "/" blurdybloop + S: * LIST (\Noselect) "/" foo + S: * LIST () "/" foo/bar + S: A682 OK LIST completed + C: A683 RENAME blurdybloop sarasoop + S: A683 OK RENAME completed + C: A684 RENAME foo zowie + S: A684 OK RENAME Completed + C: A685 LIST "" * + S: * LIST () "/" sarasoop + S: * LIST (\Noselect) "/" zowie + S: * LIST () "/" zowie/bar + S: A685 OK LIST completed + + + + + + + + + + + + + + + +Crispin Standards Track [Page 28] + +RFC 2060 IMAP4rev1 December 1996 + + + C: Z432 LIST "" * + S: * LIST () "." INBOX + S: * LIST () "." INBOX.bar + S: Z432 OK LIST completed + C: Z433 RENAME INBOX old-mail + S: Z433 OK RENAME completed + C: Z434 LIST "" * + S: * LIST () "." INBOX + S: * LIST () "." INBOX.bar + S: * LIST () "." old-mail + S: Z434 OK LIST completed + +6.3.6. SUBSCRIBE Command + + Arguments: mailbox + + Responses: no specific responses for this command + + Result: OK - subscribe completed + NO - subscribe failure: can't subscribe to that name + BAD - command unknown or arguments invalid + + The SUBSCRIBE command adds the specified mailbox name to the + server's set of "active" or "subscribed" mailboxes as returned by + the LSUB command. This command returns a tagged OK response only + if the subscription is successful. + + A server MAY validate the mailbox argument to SUBSCRIBE to verify + that it exists. However, it MUST NOT unilaterally remove an + existing mailbox name from the subscription list even if a mailbox + by that name no longer exists. + + Note: this requirement is because some server sites may routinely + remove a mailbox with a well-known name (e.g. "system-alerts") + after its contents expire, with the intention of recreating it + when new contents are appropriate. + + Example: C: A002 SUBSCRIBE #news.comp.mail.mime + S: A002 OK SUBSCRIBE completed + + + + + + + + + + + + +Crispin Standards Track [Page 29] + +RFC 2060 IMAP4rev1 December 1996 + + +6.3.7. UNSUBSCRIBE Command + + Arguments: mailbox name + + Responses: no specific responses for this command + + Result: OK - unsubscribe completed + NO - unsubscribe failure: can't unsubscribe that name + BAD - command unknown or arguments invalid + + The UNSUBSCRIBE command removes the specified mailbox name from + the server's set of "active" or "subscribed" mailboxes as returned + by the LSUB command. This command returns a tagged OK response + only if the unsubscription is successful. + + Example: C: A002 UNSUBSCRIBE #news.comp.mail.mime + S: A002 OK UNSUBSCRIBE completed + +6.3..8. LIST Command + + Arguments: reference name + mailbox name with possible wildcards + + Responses: untagged responses: LIST + + Result: OK - list completed + NO - list failure: can't list that reference or name + BAD - command unknown or arguments invalid + + The LIST command returns a subset of names from the complete set + of all names available to the client. Zero or more untagged LIST + replies are returned, containing the name attributes, hierarchy + delimiter, and name; see the description of the LIST reply for + more detail. + + The LIST command SHOULD return its data quickly, without undue + delay. For example, it SHOULD NOT go to excess trouble to + calculate \Marked or \Unmarked status or perform other processing; + if each name requires 1 second of processing, then a list of 1200 + names would take 20 minutes! + + An empty ("" string) reference name argument indicates that the + mailbox name is interpreted as by SELECT. The returned mailbox + names MUST match the supplied mailbox name pattern. A non-empty + reference name argument is the name of a mailbox or a level of + mailbox hierarchy, and indicates a context in which the mailbox + name is interpreted in an implementation-defined manner. + + + + +Crispin Standards Track [Page 30] + +RFC 2060 IMAP4rev1 December 1996 + + + An empty ("" string) mailbox name argument is a special request to + return the hierarchy delimiter and the root name of the name given + in the reference. The value returned as the root MAY be null if + the reference is non-rooted or is null. In all cases, the + hierarchy delimiter is returned. This permits a client to get the + hierarchy delimiter even when no mailboxes by that name currently + exist. + + The reference and mailbox name arguments are interpreted, in an + implementation-dependent fashion, into a canonical form that + represents an unambiguous left-to-right hierarchy. The returned + mailbox names will be in the interpreted form. + + Any part of the reference argument that is included in the + interpreted form SHOULD prefix the interpreted form. It SHOULD + also be in the same form as the reference name argument. This + rule permits the client to determine if the returned mailbox name + is in the context of the reference argument, or if something about + the mailbox argument overrode the reference argument. Without + this rule, the client would have to have knowledge of the server's + naming semantics including what characters are "breakouts" that + override a naming context. + + For example, here are some examples of how references and mailbox + names might be interpreted on a UNIX-based server: + + Reference Mailbox Name Interpretation + ------------ ------------ -------------- + ~smith/Mail/ foo.* ~smith/Mail/foo.* + archive/ % archive/% + #news. comp.mail.* #news.comp.mail.* + ~smith/Mail/ /usr/doc/foo /usr/doc/foo + archive/ ~fred/Mail/* ~fred/Mail/* + + The first three examples demonstrate interpretations in the + context of the reference argument. Note that "~smith/Mail" SHOULD + NOT be transformed into something like "/u2/users/smith/Mail", or + it would be impossible for the client to determine that the + interpretation was in the context of the reference. + + The character "*" is a wildcard, and matches zero or more + characters at this position. The character "%" is similar to "*", + but it does not match a hierarchy delimiter. If the "%" wildcard + is the last character of a mailbox name argument, matching levels + of hierarchy are also returned. If these levels of hierarchy are + not also selectable mailboxes, they are returned with the + \Noselect mailbox name attribute (see the description of the LIST + response for more details). + + + +Crispin Standards Track [Page 31] + +RFC 2060 IMAP4rev1 December 1996 + + + Server implementations are permitted to "hide" otherwise + accessible mailboxes from the wildcard characters, by preventing + certain characters or names from matching a wildcard in certain + situations. For example, a UNIX-based server might restrict the + interpretation of "*" so that an initial "/" character does not + match. + + The special name INBOX is included in the output from LIST, if + INBOX is supported by this server for this user and if the + uppercase string "INBOX" matches the interpreted reference and + mailbox name arguments with wildcards as described above. The + criteria for omitting INBOX is whether SELECT INBOX will return + failure; it is not relevant whether the user's real INBOX resides + on this or some other server. + + Example: C: A101 LIST "" "" + S: * LIST (\Noselect) "/" "" + S: A101 OK LIST Completed + C: A102 LIST #news.comp.mail.misc "" + S: * LIST (\Noselect) "." #news. + S: A102 OK LIST Completed + C: A103 LIST /usr/staff/jones "" + S: * LIST (\Noselect) "/" / + S: A103 OK LIST Completed + C: A202 LIST ~/Mail/ % + S: * LIST (\Noselect) "/" ~/Mail/foo + S: * LIST () "/" ~/Mail/meetings + S: A202 OK LIST completed + +6.3.9. LSUB Command + + Arguments: reference name + mailbox name with possible wildcards + + Responses: untagged responses: LSUB + + Result: OK - lsub completed + NO - lsub failure: can't list that reference or name + BAD - command unknown or arguments invalid + + The LSUB command returns a subset of names from the set of names + that the user has declared as being "active" or "subscribed". + Zero or more untagged LSUB replies are returned. The arguments to + LSUB are in the same form as those for LIST. + + A server MAY validate the subscribed names to see if they still + exist. If a name does not exist, it SHOULD be flagged with the + \Noselect attribute in the LSUB response. The server MUST NOT + + + +Crispin Standards Track [Page 32] + +RFC 2060 IMAP4rev1 December 1996 + + + unilaterally remove an existing mailbox name from the subscription + list even if a mailbox by that name no longer exists. + + Example: C: A002 LSUB "#news." "comp.mail.*" + S: * LSUB () "." #news.comp.mail.mime + S: * LSUB () "." #news.comp.mail.misc + S: A002 OK LSUB completed + +6.3.10. STATUS Command + + Arguments: mailbox name + status data item names + + Responses: untagged responses: STATUS + + Result: OK - status completed + NO - status failure: no status for that name + BAD - command unknown or arguments invalid + + The STATUS command requests the status of the indicated mailbox. + It does not change the currently selected mailbox, nor does it + affect the state of any messages in the queried mailbox (in + particular, STATUS MUST NOT cause messages to lose the \Recent + flag). + + The STATUS command provides an alternative to opening a second + IMAP4rev1 connection and doing an EXAMINE command on a mailbox to + query that mailbox's status without deselecting the current + mailbox in the first IMAP4rev1 connection. + + Unlike the LIST command, the STATUS command is not guaranteed to + be fast in its response. In some implementations, the server is + obliged to open the mailbox read-only internally to obtain certain + status information. Also unlike the LIST command, the STATUS + command does not accept wildcards. + + The currently defined status data items that can be requested are: + + MESSAGES The number of messages in the mailbox. + + RECENT The number of messages with the \Recent flag set. + + UIDNEXT The next UID value that will be assigned to a new + message in the mailbox. It is guaranteed that this + value will not change unless new messages are added + to the mailbox; and that it will change when new + messages are added even if those new messages are + subsequently expunged. + + + +Crispin Standards Track [Page 33] + +RFC 2060 IMAP4rev1 December 1996 + + + UIDVALIDITY The unique identifier validity value of the + mailbox. + + UNSEEN The number of messages which do not have the \Seen + flag set. + + + Example: C: A042 STATUS blurdybloop (UIDNEXT MESSAGES) + S: * STATUS blurdybloop (MESSAGES 231 UIDNEXT 44292) + S: A042 OK STATUS completed + +6.3.11. APPEND Command + + Arguments: mailbox name + OPTIONAL flag parenthesized list + OPTIONAL date/time string + message literal + + Responses: no specific responses for this command + + Result: OK - append completed + NO - append error: can't append to that mailbox, error + in flags or date/time or message text + BAD - command unknown or arguments invalid + + The APPEND command appends the literal argument as a new message + to the end of the specified destination mailbox. This argument + SHOULD be in the format of an [RFC-822] message. 8-bit characters + are permitted in the message. A server implementation that is + unable to preserve 8-bit data properly MUST be able to reversibly + convert 8-bit APPEND data to 7-bit using a [MIME-IMB] content + transfer encoding. + + Note: There MAY be exceptions, e.g. draft messages, in which + required [RFC-822] header lines are omitted in the message literal + argument to APPEND. The full implications of doing so MUST be + understood and carefully weighed. + + If a flag parenthesized list is specified, the flags SHOULD be set in + the resulting message; otherwise, the flag list of the resulting + message is set empty by default. + + If a date_time is specified, the internal date SHOULD be set in the + resulting message; otherwise, the internal date of the resulting + message is set to the current date and time by default. + + + + + + +Crispin Standards Track [Page 34] + +RFC 2060 IMAP4rev1 December 1996 + + + If the append is unsuccessful for any reason, the mailbox MUST be + restored to its state before the APPEND attempt; no partial appending + is permitted. + + If the destination mailbox does not exist, a server MUST return an + error, and MUST NOT automatically create the mailbox. Unless it is + certain that the destination mailbox can not be created, the server + MUST send the response code "[TRYCREATE]" as the prefix of the text + of the tagged NO response. This gives a hint to the client that it + can attempt a CREATE command and retry the APPEND if the CREATE is + successful. + + If the mailbox is currently selected, the normal new mail actions + SHOULD occur. Specifically, the server SHOULD notify the client + immediately via an untagged EXISTS response. If the server does not + do so, the client MAY issue a NOOP command (or failing that, a CHECK + command) after one or more APPEND commands. + + Example: C: A003 APPEND saved-messages (\Seen) {310} + C: Date: Mon, 7 Feb 1994 21:52:25 -0800 (PST) + C: From: Fred Foobar + C: Subject: afternoon meeting + C: To: mooch@owatagu.siam.edu + C: Message-Id: + C: MIME-Version: 1.0 + C: Content-Type: TEXT/PLAIN; CHARSET=US-ASCII + C: + C: Hello Joe, do you think we can meet at 3:30 tomorrow? + C: + S: A003 OK APPEND completed + + Note: the APPEND command is not used for message delivery, because + it does not provide a mechanism to transfer [SMTP] envelope + information. + +6.4. Client Commands - Selected State + + In selected state, commands that manipulate messages in a mailbox are + permitted. + + In addition to the universal commands (CAPABILITY, NOOP, and LOGOUT), + and the authenticated state commands (SELECT, EXAMINE, CREATE, + DELETE, RENAME, SUBSCRIBE, UNSUBSCRIBE, LIST, LSUB, STATUS, and + APPEND), the following commands are valid in the selected state: + CHECK, CLOSE, EXPUNGE, SEARCH, FETCH, STORE, COPY, and UID. + + + + + + +Crispin Standards Track [Page 35] + +RFC 2060 IMAP4rev1 December 1996 + + +6.4.1. CHECK Command + + Arguments: none + + Responses: no specific responses for this command + + Result: OK - check completed + BAD - command unknown or arguments invalid + + The CHECK command requests a checkpoint of the currently selected + mailbox. A checkpoint refers to any implementation-dependent + housekeeping associated with the mailbox (e.g. resolving the + server's in-memory state of the mailbox with the state on its + disk) that is not normally executed as part of each command. A + checkpoint MAY take a non-instantaneous amount of real time to + complete. If a server implementation has no such housekeeping + considerations, CHECK is equivalent to NOOP. + + There is no guarantee that an EXISTS untagged response will happen + as a result of CHECK. NOOP, not CHECK, SHOULD be used for new + mail polling. + + Example: C: FXXZ CHECK + S: FXXZ OK CHECK Completed + +6.4.2. CLOSE Command + + Arguments: none + + Responses: no specific responses for this command + + Result: OK - close completed, now in authenticated state + NO - close failure: no mailbox selected + BAD - command unknown or arguments invalid + + The CLOSE command permanently removes from the currently selected + mailbox all messages that have the \Deleted flag set, and returns + to authenticated state from selected state. No untagged EXPUNGE + responses are sent. + + No messages are removed, and no error is given, if the mailbox is + selected by an EXAMINE command or is otherwise selected read-only. + + Even if a mailbox is selected, a SELECT, EXAMINE, or LOGOUT + command MAY be issued without previously issuing a CLOSE command. + The SELECT, EXAMINE, and LOGOUT commands implicitly close the + currently selected mailbox without doing an expunge. However, + when many messages are deleted, a CLOSE-LOGOUT or CLOSE-SELECT + + + +Crispin Standards Track [Page 36] + +RFC 2060 IMAP4rev1 December 1996 + + + sequence is considerably faster than an EXPUNGE-LOGOUT or + EXPUNGE-SELECT because no untagged EXPUNGE responses (which the + client would probably ignore) are sent. + + Example: C: A341 CLOSE + S: A341 OK CLOSE completed + +6.4.3. EXPUNGE Command + + Arguments: none + + Responses: untagged responses: EXPUNGE + + Result: OK - expunge completed + NO - expunge failure: can't expunge (e.g. permission + denied) + BAD - command unknown or arguments invalid + + The EXPUNGE command permanently removes from the currently + selected mailbox all messages that have the \Deleted flag set. + Before returning an OK to the client, an untagged EXPUNGE response + is sent for each message that is removed. + + Example: C: A202 EXPUNGE + S: * 3 EXPUNGE + S: * 3 EXPUNGE + S: * 5 EXPUNGE + S: * 8 EXPUNGE + S: A202 OK EXPUNGE completed + + Note: in this example, messages 3, 4, 7, and 11 had the + \Deleted flag set. See the description of the EXPUNGE + response for further explanation. + +6.4.4. SEARCH Command + + Arguments: OPTIONAL [CHARSET] specification + searching criteria (one or more) + + Responses: REQUIRED untagged response: SEARCH + + Result: OK - search completed + NO - search error: can't search that [CHARSET] or + criteria + BAD - command unknown or arguments invalid + + + + + + +Crispin Standards Track [Page 37] + +RFC 2060 IMAP4rev1 December 1996 + + + The SEARCH command searches the mailbox for messages that match + the given searching criteria. Searching criteria consist of one + or more search keys. The untagged SEARCH response from the server + contains a listing of message sequence numbers corresponding to + those messages that match the searching criteria. + + When multiple keys are specified, the result is the intersection + (AND function) of all the messages that match those keys. For + example, the criteria DELETED FROM "SMITH" SINCE 1-Feb-1994 refers + to all deleted messages from Smith that were placed in the mailbox + since February 1, 1994. A search key can also be a parenthesized + list of one or more search keys (e.g. for use with the OR and NOT + keys). + + Server implementations MAY exclude [MIME-IMB] body parts with + terminal content media types other than TEXT and MESSAGE from + consideration in SEARCH matching. + + The OPTIONAL [CHARSET] specification consists of the word + "CHARSET" followed by a registered [CHARSET]. It indicates the + [CHARSET] of the strings that appear in the search criteria. + [MIME-IMB] content transfer encodings, and [MIME-HDRS] strings in + [RFC-822]/[MIME-IMB] headers, MUST be decoded before comparing + text in a [CHARSET] other than US-ASCII. US-ASCII MUST be + supported; other [CHARSET]s MAY be supported. If the server does + not support the specified [CHARSET], it MUST return a tagged NO + response (not a BAD). + + In all search keys that use strings, a message matches the key if + the string is a substring of the field. The matching is case- + insensitive. + + The defined search keys are as follows. Refer to the Formal + Syntax section for the precise syntactic definitions of the + arguments. + + Messages with message sequence numbers + corresponding to the specified message sequence + number set + + ALL All messages in the mailbox; the default initial + key for ANDing. + + ANSWERED Messages with the \Answered flag set. + + BCC Messages that contain the specified string in the + envelope structure's BCC field. + + + + +Crispin Standards Track [Page 38] + +RFC 2060 IMAP4rev1 December 1996 + + + BEFORE Messages whose internal date is earlier than the + specified date. + + BODY Messages that contain the specified string in the + body of the message. + + CC Messages that contain the specified string in the + envelope structure's CC field. + + DELETED Messages with the \Deleted flag set. + + DRAFT Messages with the \Draft flag set. + + FLAGGED Messages with the \Flagged flag set. + + FROM Messages that contain the specified string in the + envelope structure's FROM field. + + HEADER + Messages that have a header with the specified + field-name (as defined in [RFC-822]) and that + contains the specified string in the [RFC-822] + field-body. + + KEYWORD Messages with the specified keyword set. + + LARGER Messages with an [RFC-822] size larger than the + specified number of octets. + + NEW Messages that have the \Recent flag set but not the + \Seen flag. This is functionally equivalent to + "(RECENT UNSEEN)". + + NOT + Messages that do not match the specified search + key. + + OLD Messages that do not have the \Recent flag set. + This is functionally equivalent to "NOT RECENT" (as + opposed to "NOT NEW"). + + ON Messages whose internal date is within the + specified date. + + OR + Messages that match either search key. + + RECENT Messages that have the \Recent flag set. + + + +Crispin Standards Track [Page 39] + +RFC 2060 IMAP4rev1 December 1996 + + + SEEN Messages that have the \Seen flag set. + + SENTBEFORE + Messages whose [RFC-822] Date: header is earlier + than the specified date. + + SENTON Messages whose [RFC-822] Date: header is within the + specified date. + + SENTSINCE + Messages whose [RFC-822] Date: header is within or + later than the specified date. + + SINCE Messages whose internal date is within or later + than the specified date. + + SMALLER Messages with an [RFC-822] size smaller than the + specified number of octets. + + SUBJECT + Messages that contain the specified string in the + envelope structure's SUBJECT field. + + TEXT Messages that contain the specified string in the + header or body of the message. + + TO Messages that contain the specified string in the + envelope structure's TO field. + + UID + Messages with unique identifiers corresponding to + the specified unique identifier set. + + UNANSWERED Messages that do not have the \Answered flag set. + + UNDELETED Messages that do not have the \Deleted flag set. + + UNDRAFT Messages that do not have the \Draft flag set. + + UNFLAGGED Messages that do not have the \Flagged flag set. + + UNKEYWORD + Messages that do not have the specified keyword + set. + + UNSEEN Messages that do not have the \Seen flag set. + + + + + +Crispin Standards Track [Page 40] + +RFC 2060 IMAP4rev1 December 1996 + + + Example: C: A282 SEARCH FLAGGED SINCE 1-Feb-1994 NOT FROM "Smith" + S: * SEARCH 2 84 882 + S: A282 OK SEARCH completed + +6.4.5. FETCH Command + + Arguments: message set + message data item names + + Responses: untagged responses: FETCH + + Result: OK - fetch completed + NO - fetch error: can't fetch that data + BAD - command unknown or arguments invalid + + The FETCH command retrieves data associated with a message in the + mailbox. The data items to be fetched can be either a single atom + or a parenthesized list. + + The currently defined data items that can be fetched are: + + ALL Macro equivalent to: (FLAGS INTERNALDATE + RFC822.SIZE ENVELOPE) + + BODY Non-extensible form of BODYSTRUCTURE. + + BODY[
]<> + The text of a particular body section. The section + specification is a set of zero or more part + specifiers delimited by periods. A part specifier + is either a part number or one of the following: + HEADER, HEADER.FIELDS, HEADER.FIELDS.NOT, MIME, and + TEXT. An empty section specification refers to the + entire message, including the header. + + Every message has at least one part number. + Non-[MIME-IMB] messages, and non-multipart + [MIME-IMB] messages with no encapsulated message, + only have a part 1. + + Multipart messages are assigned consecutive part + numbers, as they occur in the message. If a + particular part is of type message or multipart, + its parts MUST be indicated by a period followed by + the part number within that nested multipart part. + + + + + + +Crispin Standards Track [Page 41] + +RFC 2060 IMAP4rev1 December 1996 + + + A part of type MESSAGE/RFC822 also has nested part + numbers, referring to parts of the MESSAGE part's + body. + + The HEADER, HEADER.FIELDS, HEADER.FIELDS.NOT, and + TEXT part specifiers can be the sole part specifier + or can be prefixed by one or more numeric part + specifiers, provided that the numeric part + specifier refers to a part of type MESSAGE/RFC822. + The MIME part specifier MUST be prefixed by one or + more numeric part specifiers. + + The HEADER, HEADER.FIELDS, and HEADER.FIELDS.NOT + part specifiers refer to the [RFC-822] header of + the message or of an encapsulated [MIME-IMT] + MESSAGE/RFC822 message. HEADER.FIELDS and + HEADER.FIELDS.NOT are followed by a list of + field-name (as defined in [RFC-822]) names, and + return a subset of the header. The subset returned + by HEADER.FIELDS contains only those header fields + with a field-name that matches one of the names in + the list; similarly, the subset returned by + HEADER.FIELDS.NOT contains only the header fields + with a non-matching field-name. The field-matching + is case-insensitive but otherwise exact. In all + cases, the delimiting blank line between the header + and the body is always included. + + The MIME part specifier refers to the [MIME-IMB] + header for this part. + + The TEXT part specifier refers to the text body of + the message, omitting the [RFC-822] header. + + + + + + + + + + + + + + + + + + +Crispin Standards Track [Page 42] + +RFC 2060 IMAP4rev1 December 1996 + + + Here is an example of a complex message + with some of its part specifiers: + + HEADER ([RFC-822] header of the message) + TEXT MULTIPART/MIXED + 1 TEXT/PLAIN + 2 APPLICATION/OCTET-STREAM + 3 MESSAGE/RFC822 + 3.HEADER ([RFC-822] header of the message) + 3.TEXT ([RFC-822] text body of the message) + 3.1 TEXT/PLAIN + 3.2 APPLICATION/OCTET-STREAM + 4 MULTIPART/MIXED + 4.1 IMAGE/GIF + 4.1.MIME ([MIME-IMB] header for the IMAGE/GIF) + 4.2 MESSAGE/RFC822 + 4.2.HEADER ([RFC-822] header of the message) + 4.2.TEXT ([RFC-822] text body of the message) + 4.2.1 TEXT/PLAIN + 4.2.2 MULTIPART/ALTERNATIVE + 4.2.2.1 TEXT/PLAIN + 4.2.2.2 TEXT/RICHTEXT + + + It is possible to fetch a substring of the + designated text. This is done by appending an open + angle bracket ("<"), the octet position of the + first desired octet, a period, the maximum number + of octets desired, and a close angle bracket (">") + to the part specifier. If the starting octet is + beyond the end of the text, an empty string is + returned. + + Any partial fetch that attempts to read beyond the + end of the text is truncated as appropriate. A + partial fetch that starts at octet 0 is returned as + a partial fetch, even if this truncation happened. + + Note: this means that BODY[]<0.2048> of a + 1500-octet message will return BODY[]<0> + with a literal of size 1500, not BODY[]. + + Note: a substring fetch of a + HEADER.FIELDS or HEADER.FIELDS.NOT part + specifier is calculated after subsetting + the header. + + + + + +Crispin Standards Track [Page 43] + +RFC 2060 IMAP4rev1 December 1996 + + + The \Seen flag is implicitly set; if this causes + the flags to change they SHOULD be included as part + of the FETCH responses. + + BODY.PEEK[
]<> + An alternate form of BODY[
] that does not + implicitly set the \Seen flag. + + BODYSTRUCTURE The [MIME-IMB] body structure of the message. This + is computed by the server by parsing the [MIME-IMB] + header fields in the [RFC-822] header and + [MIME-IMB] headers. + + ENVELOPE The envelope structure of the message. This is + computed by the server by parsing the [RFC-822] + header into the component parts, defaulting various + fields as necessary. + + FAST Macro equivalent to: (FLAGS INTERNALDATE + RFC822.SIZE) + + FLAGS The flags that are set for this message. + + FULL Macro equivalent to: (FLAGS INTERNALDATE + RFC822.SIZE ENVELOPE BODY) + + INTERNALDATE The internal date of the message. + + RFC822 Functionally equivalent to BODY[], differing in the + syntax of the resulting untagged FETCH data (RFC822 + is returned). + + RFC822.HEADER Functionally equivalent to BODY.PEEK[HEADER], + differing in the syntax of the resulting untagged + FETCH data (RFC822.HEADER is returned). + + RFC822.SIZE The [RFC-822] size of the message. + + RFC822.TEXT Functionally equivalent to BODY[TEXT], differing in + the syntax of the resulting untagged FETCH data + (RFC822.TEXT is returned). + + UID The unique identifier for the message. + + + + + + + + +Crispin Standards Track [Page 44] + +RFC 2060 IMAP4rev1 December 1996 + + + Example: C: A654 FETCH 2:4 (FLAGS BODY[HEADER.FIELDS (DATE FROM)]) + S: * 2 FETCH .... + S: * 3 FETCH .... + S: * 4 FETCH .... + S: A654 OK FETCH completed + +6.4.6. STORE Command + + Arguments: message set + message data item name + value for message data item + + Responses: untagged responses: FETCH + + Result: OK - store completed + NO - store error: can't store that data + BAD - command unknown or arguments invalid + + The STORE command alters data associated with a message in the + mailbox. Normally, STORE will return the updated value of the + data with an untagged FETCH response. A suffix of ".SILENT" in + the data item name prevents the untagged FETCH, and the server + SHOULD assume that the client has determined the updated value + itself or does not care about the updated value. + + Note: regardless of whether or not the ".SILENT" suffix was + used, the server SHOULD send an untagged FETCH response if a + change to a message's flags from an external source is + observed. The intent is that the status of the flags is + determinate without a race condition. + + The currently defined data items that can be stored are: + + FLAGS + Replace the flags for the message with the + argument. The new value of the flags are returned + as if a FETCH of those flags was done. + + FLAGS.SILENT + Equivalent to FLAGS, but without returning a new + value. + + +FLAGS + Add the argument to the flags for the message. The + new value of the flags are returned as if a FETCH + of those flags was done. + + + + + +Crispin Standards Track [Page 45] + +RFC 2060 IMAP4rev1 December 1996 + + + +FLAGS.SILENT + Equivalent to +FLAGS, but without returning a new + value. + + -FLAGS + Remove the argument from the flags for the message. + The new value of the flags are returned as if a + FETCH of those flags was done. + + -FLAGS.SILENT + Equivalent to -FLAGS, but without returning a new + value. + + Example: C: A003 STORE 2:4 +FLAGS (\Deleted) + S: * 2 FETCH FLAGS (\Deleted \Seen) + S: * 3 FETCH FLAGS (\Deleted) + S: * 4 FETCH FLAGS (\Deleted \Flagged \Seen) + S: A003 OK STORE completed + +6.4.7. COPY Command + + Arguments: message set + mailbox name + + Responses: no specific responses for this command + + Result: OK - copy completed + NO - copy error: can't copy those messages or to that + name + BAD - command unknown or arguments invalid + + The COPY command copies the specified message(s) to the end of the + specified destination mailbox. The flags and internal date of the + message(s) SHOULD be preserved in the copy. + + If the destination mailbox does not exist, a server SHOULD return + an error. It SHOULD NOT automatically create the mailbox. Unless + it is certain that the destination mailbox can not be created, the + server MUST send the response code "[TRYCREATE]" as the prefix of + the text of the tagged NO response. This gives a hint to the + client that it can attempt a CREATE command and retry the COPY if + the CREATE is successful. + + + + + + + + + +Crispin Standards Track [Page 46] + +RFC 2060 IMAP4rev1 December 1996 + + + If the COPY command is unsuccessful for any reason, server + implementations MUST restore the destination mailbox to its state + before the COPY attempt. + + Example: C: A003 COPY 2:4 MEETING + S: A003 OK COPY completed + +6.4.8. UID Command + + Arguments: command name + command arguments + + Responses: untagged responses: FETCH, SEARCH + + Result: OK - UID command completed + NO - UID command error + BAD - command unknown or arguments invalid + + The UID command has two forms. In the first form, it takes as its + arguments a COPY, FETCH, or STORE command with arguments + appropriate for the associated command. However, the numbers in + the message set argument are unique identifiers instead of message + sequence numbers. + + In the second form, the UID command takes a SEARCH command with + SEARCH command arguments. The interpretation of the arguments is + the same as with SEARCH; however, the numbers returned in a SEARCH + response for a UID SEARCH command are unique identifiers instead + of message sequence numbers. For example, the command UID SEARCH + 1:100 UID 443:557 returns the unique identifiers corresponding to + the intersection of the message sequence number set 1:100 and the + UID set 443:557. + + Message set ranges are permitted; however, there is no guarantee + that unique identifiers be contiguous. A non-existent unique + identifier within a message set range is ignored without any error + message generated. + + The number after the "*" in an untagged FETCH response is always a + message sequence number, not a unique identifier, even for a UID + command response. However, server implementations MUST implicitly + include the UID message data item as part of any FETCH response + caused by a UID command, regardless of whether a UID was specified + as a message data item to the FETCH. + + + + + + + +Crispin Standards Track [Page 47] + +RFC 2060 IMAP4rev1 December 1996 + + + Example: C: A999 UID FETCH 4827313:4828442 FLAGS + S: * 23 FETCH (FLAGS (\Seen) UID 4827313) + S: * 24 FETCH (FLAGS (\Seen) UID 4827943) + S: * 25 FETCH (FLAGS (\Seen) UID 4828442) + S: A999 UID FETCH completed + +6.5. Client Commands - Experimental/Expansion + +6.5.1. X Command + + Arguments: implementation defined + + Responses: implementation defined + + Result: OK - command completed + NO - failure + BAD - command unknown or arguments invalid + + Any command prefixed with an X is an experimental command. + Commands which are not part of this specification, a standard or + standards-track revision of this specification, or an IESG- + approved experimental protocol, MUST use the X prefix. + + Any added untagged responses issued by an experimental command + MUST also be prefixed with an X. Server implementations MUST NOT + send any such untagged responses, unless the client requested it + by issuing the associated experimental command. + + Example: C: a441 CAPABILITY + S: * CAPABILITY IMAP4rev1 AUTH=KERBEROS_V4 XPIG-LATIN + S: a441 OK CAPABILITY completed + C: A442 XPIG-LATIN + S: * XPIG-LATIN ow-nay eaking-spay ig-pay atin-lay + S: A442 OK XPIG-LATIN ompleted-cay + +7. Server Responses + + Server responses are in three forms: status responses, server data, + and command continuation request. The information contained in a + server response, identified by "Contents:" in the response + descriptions below, is described by function, not by syntax. The + precise syntax of server responses is described in the Formal Syntax + section. + + The client MUST be prepared to accept any response at all times. + + + + + + +Crispin Standards Track [Page 48] + +RFC 2060 IMAP4rev1 December 1996 + + + Status responses can be tagged or untagged. Tagged status responses + indicate the completion result (OK, NO, or BAD status) of a client + command, and have a tag matching the command. + + Some status responses, and all server data, are untagged. An + untagged response is indicated by the token "*" instead of a tag. + Untagged status responses indicate server greeting, or server status + that does not indicate the completion of a command (for example, an + impending system shutdown alert). For historical reasons, untagged + server data responses are also called "unsolicited data", although + strictly speaking only unilateral server data is truly "unsolicited". + + Certain server data MUST be recorded by the client when it is + received; this is noted in the description of that data. Such data + conveys critical information which affects the interpretation of all + subsequent commands and responses (e.g. updates reflecting the + creation or destruction of messages). + + Other server data SHOULD be recorded for later reference; if the + client does not need to record the data, or if recording the data has + no obvious purpose (e.g. a SEARCH response when no SEARCH command is + in progress), the data SHOULD be ignored. + + An example of unilateral untagged server data occurs when the IMAP + connection is in selected state. In selected state, the server + checks the mailbox for new messages as part of command execution. + Normally, this is part of the execution of every command; hence, a + NOOP command suffices to check for new messages. If new messages are + found, the server sends untagged EXISTS and RECENT responses + reflecting the new size of the mailbox. Server implementations that + offer multiple simultaneous access to the same mailbox SHOULD also + send appropriate unilateral untagged FETCH and EXPUNGE responses if + another agent changes the state of any message flags or expunges any + messages. + + Command continuation request responses use the token "+" instead of a + tag. These responses are sent by the server to indicate acceptance + of an incomplete client command and readiness for the remainder of + the command. + +7.1. Server Responses - Status Responses + + Status responses are OK, NO, BAD, PREAUTH and BYE. OK, NO, and BAD + may be tagged or untagged. PREAUTH and BYE are always untagged. + + Status responses MAY include an OPTIONAL "response code". A response + code consists of data inside square brackets in the form of an atom, + possibly followed by a space and arguments. The response code + + + +Crispin Standards Track [Page 49] + +RFC 2060 IMAP4rev1 December 1996 + + + contains additional information or status codes for client software + beyond the OK/NO/BAD condition, and are defined when there is a + specific action that a client can take based upon the additional + information. + + The currently defined response codes are: + + ALERT The human-readable text contains a special alert + that MUST be presented to the user in a fashion + that calls the user's attention to the message. + + NEWNAME Followed by a mailbox name and a new mailbox name. + A SELECT or EXAMINE is failing because the target + mailbox name no longer exists because it was + renamed to the new mailbox name. This is a hint to + the client that the operation can succeed if the + SELECT or EXAMINE is reissued with the new mailbox + name. + + PARSE The human-readable text represents an error in + parsing the [RFC-822] header or [MIME-IMB] headers + of a message in the mailbox. + + PERMANENTFLAGS Followed by a parenthesized list of flags, + indicates which of the known flags that the client + can change permanently. Any flags that are in the + FLAGS untagged response, but not the PERMANENTFLAGS + list, can not be set permanently. If the client + attempts to STORE a flag that is not in the + PERMANENTFLAGS list, the server will either reject + it with a NO reply or store the state for the + remainder of the current session only. The + PERMANENTFLAGS list can also include the special + flag \*, which indicates that it is possible to + create new keywords by attempting to store those + flags in the mailbox. + + READ-ONLY The mailbox is selected read-only, or its access + while selected has changed from read-write to + read-only. + + READ-WRITE The mailbox is selected read-write, or its access + while selected has changed from read-only to + read-write. + + + + + + + +Crispin Standards Track [Page 50] + +RFC 2060 IMAP4rev1 December 1996 + + + TRYCREATE An APPEND or COPY attempt is failing because the + target mailbox does not exist (as opposed to some + other reason). This is a hint to the client that + the operation can succeed if the mailbox is first + created by the CREATE command. + + UIDVALIDITY Followed by a decimal number, indicates the unique + identifier validity value. + + UNSEEN Followed by a decimal number, indicates the number + of the first message without the \Seen flag set. + + Additional response codes defined by particular client or server + implementations SHOULD be prefixed with an "X" until they are + added to a revision of this protocol. Client implementations + SHOULD ignore response codes that they do not recognize. + +7.1.1. OK Response + + Contents: OPTIONAL response code + human-readable text + + The OK response indicates an information message from the server. + When tagged, it indicates successful completion of the associated + command. The human-readable text MAY be presented to the user as + an information message. The untagged form indicates an + information-only message; the nature of the information MAY be + indicated by a response code. + + The untagged form is also used as one of three possible greetings + at connection startup. It indicates that the connection is not + yet authenticated and that a LOGIN command is needed. + + Example: S: * OK IMAP4rev1 server ready + C: A001 LOGIN fred blurdybloop + S: * OK [ALERT] System shutdown in 10 minutes + S: A001 OK LOGIN Completed + +7.1.2. NO Response + + Contents: OPTIONAL response code + human-readable text + + The NO response indicates an operational error message from the + server. When tagged, it indicates unsuccessful completion of the + associated command. The untagged form indicates a warning; the + command can still complete successfully. The human-readable text + describes the condition. + + + +Crispin Standards Track [Page 51] + +RFC 2060 IMAP4rev1 December 1996 + + + Example: C: A222 COPY 1:2 owatagusiam + S: * NO Disk is 98% full, please delete unnecessary data + S: A222 OK COPY completed + C: A223 COPY 3:200 blurdybloop + S: * NO Disk is 98% full, please delete unnecessary data + S: * NO Disk is 99% full, please delete unnecessary data + S: A223 NO COPY failed: disk is full + +7.1.3. BAD Response + + Contents: OPTIONAL response code + human-readable text + + The BAD response indicates an error message from the server. When + tagged, it reports a protocol-level error in the client's command; + the tag indicates the command that caused the error. The untagged + form indicates a protocol-level error for which the associated + command can not be determined; it can also indicate an internal + server failure. The human-readable text describes the condition. + + Example: C: ...very long command line... + S: * BAD Command line too long + C: ...empty line... + S: * BAD Empty command line + C: A443 EXPUNGE + S: * BAD Disk crash, attempting salvage to a new disk! + S: * OK Salvage successful, no data lost + S: A443 OK Expunge completed + +7.1.4. PREAUTH Response + + Contents: OPTIONAL response code + human-readable text + + The PREAUTH response is always untagged, and is one of three + possible greetings at connection startup. It indicates that the + connection has already been authenticated by external means and + thus no LOGIN command is needed. + + Example: S: * PREAUTH IMAP4rev1 server logged in as Smith + +7.1.5. BYE Response + + Contents: OPTIONAL response code + human-readable text + + + + + + +Crispin Standards Track [Page 52] + +RFC 2060 IMAP4rev1 December 1996 + + + The BYE response is always untagged, and indicates that the server + is about to close the connection. The human-readable text MAY be + displayed to the user in a status report by the client. The BYE + response is sent under one of four conditions: + + 1) as part of a normal logout sequence. The server will close + the connection after sending the tagged OK response to the + LOGOUT command. + + 2) as a panic shutdown announcement. The server closes the + connection immediately. + + 3) as an announcement of an inactivity autologout. The server + closes the connection immediately. + + 4) as one of three possible greetings at connection startup, + indicating that the server is not willing to accept a + connection from this client. The server closes the + connection immediately. + + The difference between a BYE that occurs as part of a normal + LOGOUT sequence (the first case) and a BYE that occurs because of + a failure (the other three cases) is that the connection closes + immediately in the failure case. + + Example: S: * BYE Autologout; idle for too long + +7.2. Server Responses - Server and Mailbox Status + + These responses are always untagged. This is how server and mailbox + status data are transmitted from the server to the client. Many of + these responses typically result from a command with the same name. + +7.2.1. CAPABILITY Response + + Contents: capability listing + + The CAPABILITY response occurs as a result of a CAPABILITY + command. The capability listing contains a space-separated + listing of capability names that the server supports. The + capability listing MUST include the atom "IMAP4rev1". + + A capability name which begins with "AUTH=" indicates that the + server supports that particular authentication mechanism. + + + + + + + +Crispin Standards Track [Page 53] + +RFC 2060 IMAP4rev1 December 1996 + + + Other capability names indicate that the server supports an + extension, revision, or amendment to the IMAP4rev1 protocol. + Server responses MUST conform to this document until the client + issues a command that uses the associated capability. + + Capability names MUST either begin with "X" or be standard or + standards-track IMAP4rev1 extensions, revisions, or amendments + registered with IANA. A server MUST NOT offer unregistered or + non-standard capability names, unless such names are prefixed with + an "X". + + Client implementations SHOULD NOT require any capability name + other than "IMAP4rev1", and MUST ignore any unknown capability + names. + + Example: S: * CAPABILITY IMAP4rev1 AUTH=KERBEROS_V4 XPIG-LATIN + +7.2.2. LIST Response + + Contents: name attributes + hierarchy delimiter + name + + The LIST response occurs as a result of a LIST command. It + returns a single name that matches the LIST specification. There + can be multiple LIST responses for a single LIST command. + + Four name attributes are defined: + + \Noinferiors It is not possible for any child levels of + hierarchy to exist under this name; no child levels + exist now and none can be created in the future. + + \Noselect It is not possible to use this name as a selectable + mailbox. + + \Marked The mailbox has been marked "interesting" by the + server; the mailbox probably contains messages that + have been added since the last time the mailbox was + selected. + + \Unmarked The mailbox does not contain any additional + messages since the last time the mailbox was + selected. + + If it is not feasible for the server to determine whether the + mailbox is "interesting" or not, or if the name is a \Noselect + name, the server SHOULD NOT send either \Marked or \Unmarked. + + + +Crispin Standards Track [Page 54] + +RFC 2060 IMAP4rev1 December 1996 + + + The hierarchy delimiter is a character used to delimit levels of + hierarchy in a mailbox name. A client can use it to create child + mailboxes, and to search higher or lower levels of naming + hierarchy. All children of a top-level hierarchy node MUST use + the same separator character. A NIL hierarchy delimiter means + that no hierarchy exists; the name is a "flat" name. + + The name represents an unambiguous left-to-right hierarchy, and + MUST be valid for use as a reference in LIST and LSUB commands. + Unless \Noselect is indicated, the name MUST also be valid as an + argument for commands, such as SELECT, that accept mailbox + names. + + Example: S: * LIST (\Noselect) "/" ~/Mail/foo + +7.2.3. LSUB Response + + Contents: name attributes + hierarchy delimiter + name + + The LSUB response occurs as a result of an LSUB command. It + returns a single name that matches the LSUB specification. There + can be multiple LSUB responses for a single LSUB command. The + data is identical in format to the LIST response. + + Example: S: * LSUB () "." #news.comp.mail.misc + +7.2.4 STATUS Response + + Contents: name + status parenthesized list + + The STATUS response occurs as a result of an STATUS command. It + returns the mailbox name that matches the STATUS specification and + the requested mailbox status information. + + Example: S: * STATUS blurdybloop (MESSAGES 231 UIDNEXT 44292) + +7.2.5. SEARCH Response + + Contents: zero or more numbers + + + + + + + + + +Crispin Standards Track [Page 55] + +RFC 2060 IMAP4rev1 December 1996 + + + The SEARCH response occurs as a result of a SEARCH or UID SEARCH + command. The number(s) refer to those messages that match the + search criteria. For SEARCH, these are message sequence numbers; + for UID SEARCH, these are unique identifiers. Each number is + delimited by a space. + + Example: S: * SEARCH 2 3 6 + +7.2.6. FLAGS Response + + Contents: flag parenthesized list + + The FLAGS response occurs as a result of a SELECT or EXAMINE + command. The flag parenthesized list identifies the flags (at a + minimum, the system-defined flags) that are applicable for this + mailbox. Flags other than the system flags can also exist, + depending on server implementation. + + The update from the FLAGS response MUST be recorded by the client. + + Example: S: * FLAGS (\Answered \Flagged \Deleted \Seen \Draft) + +7.3. Server Responses - Mailbox Size + + These responses are always untagged. This is how changes in the size + of the mailbox are trasnmitted from the server to the client. + Immediately following the "*" token is a number that represents a + message count. + +7.3.1. EXISTS Response + + Contents: none + + The EXISTS response reports the number of messages in the mailbox. + This response occurs as a result of a SELECT or EXAMINE command, + and if the size of the mailbox changes (e.g. new mail). + + The update from the EXISTS response MUST be recorded by the + client. + + Example: S: * 23 EXISTS + + + + + + + + + + +Crispin Standards Track [Page 56] + +RFC 2060 IMAP4rev1 December 1996 + + +7.3.2. RECENT Response + + Contents: none + + The RECENT response reports the number of messages with the + \Recent flag set. This response occurs as a result of a SELECT or + EXAMINE command, and if the size of the mailbox changes (e.g. new + mail). + + Note: It is not guaranteed that the message sequence numbers of + recent messages will be a contiguous range of the highest n + messages in the mailbox (where n is the value reported by the + RECENT response). Examples of situations in which this is not + the case are: multiple clients having the same mailbox open + (the first session to be notified will see it as recent, others + will probably see it as non-recent), and when the mailbox is + re-ordered by a non-IMAP agent. + + The only reliable way to identify recent messages is to look at + message flags to see which have the \Recent flag set, or to do + a SEARCH RECENT. + + The update from the RECENT response MUST be recorded by the + client. + + Example: S: * 5 RECENT + +7.4. Server Responses - Message Status + + These responses are always untagged. This is how message data are + transmitted from the server to the client, often as a result of a + command with the same name. Immediately following the "*" token is a + number that represents a message sequence number. + +7.4.1. EXPUNGE Response + + Contents: none + + The EXPUNGE response reports that the specified message sequence + number has been permanently removed from the mailbox. The message + sequence number for each successive message in the mailbox is + immediately decremented by 1, and this decrement is reflected in + message sequence numbers in subsequent responses (including other + untagged EXPUNGE responses). + + As a result of the immediate decrement rule, message sequence + numbers that appear in a set of successive EXPUNGE responses + depend upon whether the messages are removed starting from lower + + + +Crispin Standards Track [Page 57] + +RFC 2060 IMAP4rev1 December 1996 + + + numbers to higher numbers, or from higher numbers to lower + numbers. For example, if the last 5 messages in a 9-message + mailbox are expunged; a "lower to higher" server will send five + untagged EXPUNGE responses for message sequence number 5, whereas + a "higher to lower server" will send successive untagged EXPUNGE + responses for message sequence numbers 9, 8, 7, 6, and 5. + + An EXPUNGE response MUST NOT be sent when no command is in + progress; nor while responding to a FETCH, STORE, or SEARCH + command. This rule is necessary to prevent a loss of + synchronization of message sequence numbers between client and + server. + + The update from the EXPUNGE response MUST be recorded by the + client. + + Example: S: * 44 EXPUNGE + +7.4.2. FETCH Response + + Contents: message data + + The FETCH response returns data about a message to the client. + The data are pairs of data item names and their values in + parentheses. This response occurs as the result of a FETCH or + STORE command, as well as by unilateral server decision (e.g. flag + updates). + + The current data items are: + + BODY A form of BODYSTRUCTURE without extension data. + + BODY[
]<> + A string expressing the body contents of the + specified section. The string SHOULD be + interpreted by the client according to the content + transfer encoding, body type, and subtype. + + If the origin octet is specified, this string is a + substring of the entire body contents, starting at + that origin octet. This means that BODY[]<0> MAY + be truncated, but BODY[] is NEVER truncated. + + 8-bit textual data is permitted if a [CHARSET] + identifier is part of the body parameter + parenthesized list for this section. Note that + headers (part specifiers HEADER or MIME, or the + header portion of a MESSAGE/RFC822 part), MUST be + + + +Crispin Standards Track [Page 58] + +RFC 2060 IMAP4rev1 December 1996 + + + 7-bit; 8-bit characters are not permitted in + headers. Note also that the blank line at the end + of the header is always included in header data. + + Non-textual data such as binary data MUST be + transfer encoded into a textual form such as BASE64 + prior to being sent to the client. To derive the + original binary data, the client MUST decode the + transfer encoded string. + + BODYSTRUCTURE A parenthesized list that describes the [MIME-IMB] + body structure of a message. This is computed by + the server by parsing the [MIME-IMB] header fields, + defaulting various fields as necessary. + + For example, a simple text message of 48 lines and + 2279 octets can have a body structure of: ("TEXT" + "PLAIN" ("CHARSET" "US-ASCII") NIL NIL "7BIT" 2279 + 48) + + Multiple parts are indicated by parenthesis + nesting. Instead of a body type as the first + element of the parenthesized list there is a nested + body. The second element of the parenthesized list + is the multipart subtype (mixed, digest, parallel, + alternative, etc.). + + For example, a two part message consisting of a + text and a BASE645-encoded text attachment can have + a body structure of: (("TEXT" "PLAIN" ("CHARSET" + "US-ASCII") NIL NIL "7BIT" 1152 23)("TEXT" "PLAIN" + ("CHARSET" "US-ASCII" "NAME" "cc.diff") + "<960723163407.20117h@cac.washington.edu>" + "Compiler diff" "BASE64" 4554 73) "MIXED")) + + Extension data follows the multipart subtype. + Extension data is never returned with the BODY + fetch, but can be returned with a BODYSTRUCTURE + fetch. Extension data, if present, MUST be in the + defined order. + + The extension data of a multipart body part are in + the following order: + + body parameter parenthesized list + A parenthesized list of attribute/value pairs + [e.g. ("foo" "bar" "baz" "rag") where "bar" is + the value of "foo" and "rag" is the value of + + + +Crispin Standards Track [Page 59] + +RFC 2060 IMAP4rev1 December 1996 + + + "baz"] as defined in [MIME-IMB]. + + body disposition + A parenthesized list, consisting of a + disposition type string followed by a + parenthesized list of disposition + attribute/value pairs. The disposition type and + attribute names will be defined in a future + standards-track revision to [DISPOSITION]. + + body language + A string or parenthesized list giving the body + language value as defined in [LANGUAGE-TAGS]. + + Any following extension data are not yet defined in + this version of the protocol. Such extension data + can consist of zero or more NILs, strings, numbers, + or potentially nested parenthesized lists of such + data. Client implementations that do a + BODYSTRUCTURE fetch MUST be prepared to accept such + extension data. Server implementations MUST NOT + send such extension data until it has been defined + by a revision of this protocol. + + The basic fields of a non-multipart body part are + in the following order: + + body type + A string giving the content media type name as + defined in [MIME-IMB]. + + body subtype + A string giving the content subtype name as + defined in [MIME-IMB]. + + body parameter parenthesized list + A parenthesized list of attribute/value pairs + [e.g. ("foo" "bar" "baz" "rag") where "bar" is + the value of "foo" and "rag" is the value of + "baz"] as defined in [MIME-IMB]. + + body id + A string giving the content id as defined in + [MIME-IMB]. + + body description + A string giving the content description as + defined in [MIME-IMB]. + + + +Crispin Standards Track [Page 60] + +RFC 2060 IMAP4rev1 December 1996 + + + body encoding + A string giving the content transfer encoding as + defined in [MIME-IMB]. + + body size + A number giving the size of the body in octets. + Note that this size is the size in its transfer + encoding and not the resulting size after any + decoding. + + A body type of type MESSAGE and subtype RFC822 + contains, immediately after the basic fields, the + envelope structure, body structure, and size in + text lines of the encapsulated message. + + A body type of type TEXT contains, immediately + after the basic fields, the size of the body in + text lines. Note that this size is the size in its + content transfer encoding and not the resulting + size after any decoding. + + Extension data follows the basic fields and the + type-specific fields listed above. Extension data + is never returned with the BODY fetch, but can be + returned with a BODYSTRUCTURE fetch. Extension + data, if present, MUST be in the defined order. + + The extension data of a non-multipart body part are + in the following order: + + body MD5 + A string giving the body MD5 value as defined in + [MD5]. + + body disposition + A parenthesized list with the same content and + function as the body disposition for a multipart + body part. + + body language + A string or parenthesized list giving the body + language value as defined in [LANGUAGE-TAGS]. + + Any following extension data are not yet defined in + this version of the protocol, and would be as + described above under multipart extension data. + + + + + +Crispin Standards Track [Page 61] + +RFC 2060 IMAP4rev1 December 1996 + + + ENVELOPE A parenthesized list that describes the envelope + structure of a message. This is computed by the + server by parsing the [RFC-822] header into the + component parts, defaulting various fields as + necessary. + + The fields of the envelope structure are in the + following order: date, subject, from, sender, + reply-to, to, cc, bcc, in-reply-to, and message-id. + The date, subject, in-reply-to, and message-id + fields are strings. The from, sender, reply-to, + to, cc, and bcc fields are parenthesized lists of + address structures. + + An address structure is a parenthesized list that + describes an electronic mail address. The fields + of an address structure are in the following order: + personal name, [SMTP] at-domain-list (source + route), mailbox name, and host name. + + [RFC-822] group syntax is indicated by a special + form of address structure in which the host name + field is NIL. If the mailbox name field is also + NIL, this is an end of group marker (semi-colon in + RFC 822 syntax). If the mailbox name field is + non-NIL, this is a start of group marker, and the + mailbox name field holds the group name phrase. + + Any field of an envelope or address structure that + is not applicable is presented as NIL. Note that + the server MUST default the reply-to and sender + fields from the from field; a client is not + expected to know to do this. + + FLAGS A parenthesized list of flags that are set for this + message. + + INTERNALDATE A string representing the internal date of the + message. + + RFC822 Equivalent to BODY[]. + + RFC822.HEADER Equivalent to BODY.PEEK[HEADER]. + + RFC822.SIZE A number expressing the [RFC-822] size of the + message. + + RFC822.TEXT Equivalent to BODY[TEXT]. + + + +Crispin Standards Track [Page 62] + +RFC 2060 IMAP4rev1 December 1996 + + + UID A number expressing the unique identifier of the + message. + + + Example: S: * 23 FETCH (FLAGS (\Seen) RFC822.SIZE 44827) + +7.5. Server Responses - Command Continuation Request + + The command continuation request response is indicated by a "+" token + instead of a tag. This form of response indicates that the server is + ready to accept the continuation of a command from the client. The + remainder of this response is a line of text. + + This response is used in the AUTHORIZATION command to transmit server + data to the client, and request additional client data. This + response is also used if an argument to any command is a literal. + + The client is not permitted to send the octets of the literal unless + the server indicates that it expects it. This permits the server to + process commands and reject errors on a line-by-line basis. The + remainder of the command, including the CRLF that terminates a + command, follows the octets of the literal. If there are any + additional command arguments the literal octets are followed by a + space and those arguments. + + Example: C: A001 LOGIN {11} + S: + Ready for additional command text + C: FRED FOOBAR {7} + S: + Ready for additional command text + C: fat man + S: A001 OK LOGIN completed + C: A044 BLURDYBLOOP {102856} + S: A044 BAD No such command as "BLURDYBLOOP" + +8. Sample IMAP4rev1 connection + + The following is a transcript of an IMAP4rev1 connection. A long + line in this sample is broken for editorial clarity. + +S: * OK IMAP4rev1 Service Ready +C: a001 login mrc secret +S: a001 OK LOGIN completed +C: a002 select inbox +S: * 18 EXISTS +S: * FLAGS (\Answered \Flagged \Deleted \Seen \Draft) +S: * 2 RECENT +S: * OK [UNSEEN 17] Message 17 is the first unseen message +S: * OK [UIDVALIDITY 3857529045] UIDs valid + + + +Crispin Standards Track [Page 63] + +RFC 2060 IMAP4rev1 December 1996 + + +S: a002 OK [READ-WRITE] SELECT completed +C: a003 fetch 12 full +S: * 12 FETCH (FLAGS (\Seen) INTERNALDATE "17-Jul-1996 02:44:25 -0700" + RFC822.SIZE 4286 ENVELOPE ("Wed, 17 Jul 1996 02:23:25 -0700 (PDT)" + "IMAP4rev1 WG mtg summary and minutes" + (("Terry Gray" NIL "gray" "cac.washington.edu")) + (("Terry Gray" NIL "gray" "cac.washington.edu")) + (("Terry Gray" NIL "gray" "cac.washington.edu")) + ((NIL NIL "imap" "cac.washington.edu")) + ((NIL NIL "minutes" "CNRI.Reston.VA.US") + ("John Klensin" NIL "KLENSIN" "INFOODS.MIT.EDU")) NIL NIL + "") + BODY ("TEXT" "PLAIN" ("CHARSET" "US-ASCII") NIL NIL "7BIT" 3028 92)) +S: a003 OK FETCH completed +C: a004 fetch 12 body[header] +S: * 12 FETCH (BODY[HEADER] {350} +S: Date: Wed, 17 Jul 1996 02:23:25 -0700 (PDT) +S: From: Terry Gray +S: Subject: IMAP4rev1 WG mtg summary and minutes +S: To: imap@cac.washington.edu +S: cc: minutes@CNRI.Reston.VA.US, John Klensin +S: Message-Id: +S: MIME-Version: 1.0 +S: Content-Type: TEXT/PLAIN; CHARSET=US-ASCII +S: +S: ) +S: a004 OK FETCH completed +C: a005 store 12 +flags \deleted +S: * 12 FETCH (FLAGS (\Seen \Deleted)) +S: a005 OK +FLAGS completed +C: a006 logout +S: * BYE IMAP4rev1 server terminating connection +S: a006 OK LOGOUT completed + +9. Formal Syntax + + The following syntax specification uses the augmented Backus-Naur + Form (BNF) notation as specified in [RFC-822] with one exception; the + delimiter used with the "#" construct is a single space (SPACE) and + not one or more commas. + + In the case of alternative or optional rules in which a later rule + overlaps an earlier rule, the rule which is listed earlier MUST take + priority. For example, "\Seen" when parsed as a flag is the \Seen + flag name and not a flag_extension, even though "\Seen" could be + parsed as a flag_extension. Some, but not all, instances of this + rule are noted below. + + + + +Crispin Standards Track [Page 64] + +RFC 2060 IMAP4rev1 December 1996 + + + Except as noted otherwise, all alphabetic characters are case- + insensitive. The use of upper or lower case characters to define + token strings is for editorial clarity only. Implementations MUST + accept these strings in a case-insensitive fashion. + +address ::= "(" addr_name SPACE addr_adl SPACE addr_mailbox + SPACE addr_host ")" + +addr_adl ::= nstring + ;; Holds route from [RFC-822] route-addr if + ;; non-NIL + +addr_host ::= nstring + ;; NIL indicates [RFC-822] group syntax. + ;; Otherwise, holds [RFC-822] domain name + +addr_mailbox ::= nstring + ;; NIL indicates end of [RFC-822] group; if + ;; non-NIL and addr_host is NIL, holds + ;; [RFC-822] group name. + ;; Otherwise, holds [RFC-822] local-part + +addr_name ::= nstring + ;; Holds phrase from [RFC-822] mailbox if + ;; non-NIL + +alpha ::= "A" / "B" / "C" / "D" / "E" / "F" / "G" / "H" / + "I" / "J" / "K" / "L" / "M" / "N" / "O" / "P" / + "Q" / "R" / "S" / "T" / "U" / "V" / "W" / "X" / + "Y" / "Z" / + "a" / "b" / "c" / "d" / "e" / "f" / "g" / "h" / + "i" / "j" / "k" / "l" / "m" / "n" / "o" / "p" / + "q" / "r" / "s" / "t" / "u" / "v" / "w" / "x" / + "y" / "z" + ;; Case-sensitive + +append ::= "APPEND" SPACE mailbox [SPACE flag_list] + [SPACE date_time] SPACE literal + +astring ::= atom / string + +atom ::= 1*ATOM_CHAR + +ATOM_CHAR ::= + +atom_specials ::= "(" / ")" / "{" / SPACE / CTL / list_wildcards / + quoted_specials + + + + +Crispin Standards Track [Page 65] + +RFC 2060 IMAP4rev1 December 1996 + + +authenticate ::= "AUTHENTICATE" SPACE auth_type *(CRLF base64) + +auth_type ::= atom + ;; Defined by [IMAP-AUTH] + +base64 ::= *(4base64_char) [base64_terminal] + +base64_char ::= alpha / digit / "+" / "/" + +base64_terminal ::= (2base64_char "==") / (3base64_char "=") + +body ::= "(" body_type_1part / body_type_mpart ")" + +body_extension ::= nstring / number / "(" 1#body_extension ")" + ;; Future expansion. Client implementations + ;; MUST accept body_extension fields. Server + ;; implementations MUST NOT generate + ;; body_extension fields except as defined by + ;; future standard or standards-track + ;; revisions of this specification. + +body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp + [SPACE body_fld_lang + [SPACE 1#body_extension]]] + ;; MUST NOT be returned on non-extensible + ;; "BODY" fetch + +body_ext_mpart ::= body_fld_param + [SPACE body_fld_dsp SPACE body_fld_lang + [SPACE 1#body_extension]] + ;; MUST NOT be returned on non-extensible + ;; "BODY" fetch + +body_fields ::= body_fld_param SPACE body_fld_id SPACE + body_fld_desc SPACE body_fld_enc SPACE + body_fld_octets + +body_fld_desc ::= nstring + +body_fld_dsp ::= "(" string SPACE body_fld_param ")" / nil + +body_fld_enc ::= (<"> ("7BIT" / "8BIT" / "BINARY" / "BASE64"/ + "QUOTED-PRINTABLE") <">) / string + +body_fld_id ::= nstring + +body_fld_lang ::= nstring / "(" 1#string ")" + + + + +Crispin Standards Track [Page 66] + +RFC 2060 IMAP4rev1 December 1996 + + +body_fld_lines ::= number + +body_fld_md5 ::= nstring + +body_fld_octets ::= number + +body_fld_param ::= "(" 1#(string SPACE string) ")" / nil + +body_type_1part ::= (body_type_basic / body_type_msg / body_type_text) + [SPACE body_ext_1part] + +body_type_basic ::= media_basic SPACE body_fields + ;; MESSAGE subtype MUST NOT be "RFC822" + +body_type_mpart ::= 1*body SPACE media_subtype + [SPACE body_ext_mpart] + +body_type_msg ::= media_message SPACE body_fields SPACE envelope + SPACE body SPACE body_fld_lines + +body_type_text ::= media_text SPACE body_fields SPACE body_fld_lines + +capability ::= "AUTH=" auth_type / atom + ;; New capabilities MUST begin with "X" or be + ;; registered with IANA as standard or + ;; standards-track + +capability_data ::= "CAPABILITY" SPACE [1#capability SPACE] "IMAP4rev1" + [SPACE 1#capability] + ;; IMAP4rev1 servers which offer RFC 1730 + ;; compatibility MUST list "IMAP4" as the first + ;; capability. + +CHAR ::= + +CHAR8 ::= + +command ::= tag SPACE (command_any / command_auth / + command_nonauth / command_select) CRLF + ;; Modal based on state + +command_any ::= "CAPABILITY" / "LOGOUT" / "NOOP" / x_command + ;; Valid in all states + +command_auth ::= append / create / delete / examine / list / lsub / + rename / select / status / subscribe / unsubscribe + ;; Valid only in Authenticated or Selected state + + + +Crispin Standards Track [Page 67] + +RFC 2060 IMAP4rev1 December 1996 + + +command_nonauth ::= login / authenticate + ;; Valid only when in Non-Authenticated state + +command_select ::= "CHECK" / "CLOSE" / "EXPUNGE" / + copy / fetch / store / uid / search + ;; Valid only when in Selected state + +continue_req ::= "+" SPACE (resp_text / base64) + +copy ::= "COPY" SPACE set SPACE mailbox + +CR ::= + +create ::= "CREATE" SPACE mailbox + ;; Use of INBOX gives a NO error + +CRLF ::= CR LF + +CTL ::= + +date ::= date_text / <"> date_text <"> + +date_day ::= 1*2digit + ;; Day of month + +date_day_fixed ::= (SPACE digit) / 2digit + ;; Fixed-format version of date_day + +date_month ::= "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" / + "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec" + +date_text ::= date_day "-" date_month "-" date_year + +date_year ::= 4digit + +date_time ::= <"> date_day_fixed "-" date_month "-" date_year + SPACE time SPACE zone <"> + +delete ::= "DELETE" SPACE mailbox + ;; Use of INBOX gives a NO error + +digit ::= "0" / digit_nz + +digit_nz ::= "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / + "9" + + + + + +Crispin Standards Track [Page 68] + +RFC 2060 IMAP4rev1 December 1996 + + +envelope ::= "(" env_date SPACE env_subject SPACE env_from + SPACE env_sender SPACE env_reply_to SPACE env_to + SPACE env_cc SPACE env_bcc SPACE env_in_reply_to + SPACE env_message_id ")" + +env_bcc ::= "(" 1*address ")" / nil + +env_cc ::= "(" 1*address ")" / nil + +env_date ::= nstring + +env_from ::= "(" 1*address ")" / nil + +env_in_reply_to ::= nstring + +env_message_id ::= nstring + +env_reply_to ::= "(" 1*address ")" / nil + +env_sender ::= "(" 1*address ")" / nil + +env_subject ::= nstring + +env_to ::= "(" 1*address ")" / nil + +examine ::= "EXAMINE" SPACE mailbox + +fetch ::= "FETCH" SPACE set SPACE ("ALL" / "FULL" / + "FAST" / fetch_att / "(" 1#fetch_att ")") + +fetch_att ::= "ENVELOPE" / "FLAGS" / "INTERNALDATE" / + "RFC822" [".HEADER" / ".SIZE" / ".TEXT"] / + "BODY" ["STRUCTURE"] / "UID" / + "BODY" [".PEEK"] section + ["<" number "." nz_number ">"] + +flag ::= "\Answered" / "\Flagged" / "\Deleted" / + "\Seen" / "\Draft" / flag_keyword / flag_extension + +flag_extension ::= "\" atom + ;; Future expansion. Client implementations + ;; MUST accept flag_extension flags. Server + ;; implementations MUST NOT generate + ;; flag_extension flags except as defined by + ;; future standard or standards-track + ;; revisions of this specification. + +flag_keyword ::= atom + + + +Crispin Standards Track [Page 69] + +RFC 2060 IMAP4rev1 December 1996 + + +flag_list ::= "(" #flag ")" + +greeting ::= "*" SPACE (resp_cond_auth / resp_cond_bye) CRLF + +header_fld_name ::= astring + +header_list ::= "(" 1#header_fld_name ")" + +LF ::= + +list ::= "LIST" SPACE mailbox SPACE list_mailbox + +list_mailbox ::= 1*(ATOM_CHAR / list_wildcards) / string + +list_wildcards ::= "%" / "*" + +literal ::= "{" number "}" CRLF *CHAR8 + ;; Number represents the number of CHAR8 octets + +login ::= "LOGIN" SPACE userid SPACE password + +lsub ::= "LSUB" SPACE mailbox SPACE list_mailbox + +mailbox ::= "INBOX" / astring + ;; INBOX is case-insensitive. All case variants of + ;; INBOX (e.g. "iNbOx") MUST be interpreted as INBOX + ;; not as an astring. Refer to section 5.1 for + ;; further semantic details of mailbox names. + +mailbox_data ::= "FLAGS" SPACE flag_list / + "LIST" SPACE mailbox_list / + "LSUB" SPACE mailbox_list / + "MAILBOX" SPACE text / + "SEARCH" [SPACE 1#nz_number] / + "STATUS" SPACE mailbox SPACE + "(" # QUOTED_CHAR <"> / nil) SPACE mailbox + +media_basic ::= (<"> ("APPLICATION" / "AUDIO" / "IMAGE" / + "MESSAGE" / "VIDEO") <">) / string) + SPACE media_subtype + ;; Defined in [MIME-IMT] + +media_message ::= <"> "MESSAGE" <"> SPACE <"> "RFC822" <"> + + + +Crispin Standards Track [Page 70] + +RFC 2060 IMAP4rev1 December 1996 + + + ;; Defined in [MIME-IMT] + +media_subtype ::= string + ;; Defined in [MIME-IMT] + +media_text ::= <"> "TEXT" <"> SPACE media_subtype + ;; Defined in [MIME-IMT] + +message_data ::= nz_number SPACE ("EXPUNGE" / + ("FETCH" SPACE msg_att)) + +msg_att ::= "(" 1#("ENVELOPE" SPACE envelope / + "FLAGS" SPACE "(" #(flag / "\Recent") ")" / + "INTERNALDATE" SPACE date_time / + "RFC822" [".HEADER" / ".TEXT"] SPACE nstring / + "RFC822.SIZE" SPACE number / + "BODY" ["STRUCTURE"] SPACE body / + "BODY" section ["<" number ">"] SPACE nstring / + "UID" SPACE uniqueid) ")" + +nil ::= "NIL" + +nstring ::= string / nil + +number ::= 1*digit + ;; Unsigned 32-bit integer + ;; (0 <= n < 4,294,967,296) + +nz_number ::= digit_nz *digit + ;; Non-zero unsigned 32-bit integer + ;; (0 < n < 4,294,967,296) + +password ::= astring + +quoted ::= <"> *QUOTED_CHAR <"> + +QUOTED_CHAR ::= / + "\" quoted_specials + +quoted_specials ::= <"> / "\" + +rename ::= "RENAME" SPACE mailbox SPACE mailbox + ;; Use of INBOX as a destination gives a NO error + +response ::= *(continue_req / response_data) response_done + +response_data ::= "*" SPACE (resp_cond_state / resp_cond_bye / + mailbox_data / message_data / capability_data) + + + +Crispin Standards Track [Page 71] + +RFC 2060 IMAP4rev1 December 1996 + + + CRLF + +response_done ::= response_tagged / response_fatal + +response_fatal ::= "*" SPACE resp_cond_bye CRLF + ;; Server closes connection immediately + +response_tagged ::= tag SPACE resp_cond_state CRLF + +resp_cond_auth ::= ("OK" / "PREAUTH") SPACE resp_text + ;; Authentication condition + +resp_cond_bye ::= "BYE" SPACE resp_text + +resp_cond_state ::= ("OK" / "NO" / "BAD") SPACE resp_text + ;; Status condition + +resp_text ::= ["[" resp_text_code "]" SPACE] (text_mime2 / text) + ;; text SHOULD NOT begin with "[" or "=" + +resp_text_code ::= "ALERT" / "PARSE" / + "PERMANENTFLAGS" SPACE "(" #(flag / "\*") ")" / + "READ-ONLY" / "READ-WRITE" / "TRYCREATE" / + "UIDVALIDITY" SPACE nz_number / + "UNSEEN" SPACE nz_number / + atom [SPACE 1*] + +search ::= "SEARCH" SPACE ["CHARSET" SPACE astring SPACE] + 1#search_key + ;; [CHARSET] MUST be registered with IANA + +search_key ::= "ALL" / "ANSWERED" / "BCC" SPACE astring / + "BEFORE" SPACE date / "BODY" SPACE astring / + "CC" SPACE astring / "DELETED" / "FLAGGED" / + "FROM" SPACE astring / + "KEYWORD" SPACE flag_keyword / "NEW" / "OLD" / + "ON" SPACE date / "RECENT" / "SEEN" / + "SINCE" SPACE date / "SUBJECT" SPACE astring / + "TEXT" SPACE astring / "TO" SPACE astring / + "UNANSWERED" / "UNDELETED" / "UNFLAGGED" / + "UNKEYWORD" SPACE flag_keyword / "UNSEEN" / + ;; Above this line were in [IMAP2] + "DRAFT" / + "HEADER" SPACE header_fld_name SPACE astring / + "LARGER" SPACE number / "NOT" SPACE search_key / + "OR" SPACE search_key SPACE search_key / + "SENTBEFORE" SPACE date / "SENTON" SPACE date / + "SENTSINCE" SPACE date / "SMALLER" SPACE number / + + + +Crispin Standards Track [Page 72] + +RFC 2060 IMAP4rev1 December 1996 + + + "UID" SPACE set / "UNDRAFT" / set / + "(" 1#search_key ")" + +section ::= "[" [section_text / (nz_number *["." nz_number] + ["." (section_text / "MIME")])] "]" + +section_text ::= "HEADER" / "HEADER.FIELDS" [".NOT"] + SPACE header_list / "TEXT" + +select ::= "SELECT" SPACE mailbox + +sequence_num ::= nz_number / "*" + ;; * is the largest number in use. For message + ;; sequence numbers, it is the number of messages + ;; in the mailbox. For unique identifiers, it is + ;; the unique identifier of the last message in + ;; the mailbox. + +set ::= sequence_num / (sequence_num ":" sequence_num) / + (set "," set) + ;; Identifies a set of messages. For message + ;; sequence numbers, these are consecutive + ;; numbers from 1 to the number of messages in + ;; the mailbox + ;; Comma delimits individual numbers, colon + ;; delimits between two numbers inclusive. + ;; Example: 2,4:7,9,12:* is 2,4,5,6,7,9,12,13, + ;; 14,15 for a mailbox with 15 messages. + +SPACE ::= + +status ::= "STATUS" SPACE mailbox SPACE "(" 1#status_att ")" + +status_att ::= "MESSAGES" / "RECENT" / "UIDNEXT" / "UIDVALIDITY" / + "UNSEEN" + +store ::= "STORE" SPACE set SPACE store_att_flags + +store_att_flags ::= (["+" / "-"] "FLAGS" [".SILENT"]) SPACE + (flag_list / #flag) + +string ::= quoted / literal + +subscribe ::= "SUBSCRIBE" SPACE mailbox + +tag ::= 1* + +text ::= 1*TEXT_CHAR + + + +Crispin Standards Track [Page 73] + +RFC 2060 IMAP4rev1 December 1996 + + +text_mime2 ::= "=?" "?" "?" + "?=" + ;; Syntax defined in [MIME-HDRS] + +TEXT_CHAR ::= + +time ::= 2digit ":" 2digit ":" 2digit + ;; Hours minutes seconds + +uid ::= "UID" SPACE (copy / fetch / search / store) + ;; Unique identifiers used instead of message + ;; sequence numbers + +uniqueid ::= nz_number + ;; Strictly ascending + +unsubscribe ::= "UNSUBSCRIBE" SPACE mailbox + +userid ::= astring + +x_command ::= "X" atom + +zone ::= ("+" / "-") 4digit + ;; Signed four-digit value of hhmm representing + ;; hours and minutes west of Greenwich (that is, + ;; (the amount that the given time differs from + ;; Universal Time). Subtracting the timezone + ;; from the given time will give the UT form. + ;; The Universal Time zone is "+0000". + +10. Author's Note + + This document is a revision or rewrite of earlier documents, and + supercedes the protocol specification in those documents: RFC 1730, + unpublished IMAP2bis.TXT document, RFC 1176, and RFC 1064. + +11. Security Considerations + + IMAP4rev1 protocol transactions, including electronic mail data, are + sent in the clear over the network unless privacy protection is + negotiated in the AUTHENTICATE command. + + A server error message for an AUTHENTICATE command which fails due to + invalid credentials SHOULD NOT detail why the credentials are + invalid. + + Use of the LOGIN command sends passwords in the clear. This can be + avoided by using the AUTHENTICATE command instead. + + + +Crispin Standards Track [Page 74] + +RFC 2060 IMAP4rev1 December 1996 + + + A server error message for a failing LOGIN command SHOULD NOT specify + that the user name, as opposed to the password, is invalid. + + Additional security considerations are discussed in the section + discussing the AUTHENTICATE and LOGIN commands. + +12. Author's Address + + Mark R. Crispin + Networks and Distributed Computing + University of Washington + 4545 15th Aveneue NE + Seattle, WA 98105-4527 + + Phone: (206) 543-5762 + + EMail: MRC@CAC.Washington.EDU + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Crispin Standards Track [Page 75] + +RFC 2060 IMAP4rev1 December 1996 + + +Appendices + +A. References + +[ACAP] Myers, J. "ACAP -- Application Configuration Access Protocol", +Work in Progress. + +[CHARSET] Reynolds, J., and J. Postel, "Assigned Numbers", STD 2, +RFC 1700, USC/Information Sciences Institute, October 1994. + +[DISPOSITION] Troost, R., and Dorner, S., "Communicating Presentation +Information in Internet Messages: The Content-Disposition Header", +RFC 1806, June 1995. + +[IMAP-AUTH] Myers, J., "IMAP4 Authentication Mechanism", RFC 1731. +Carnegie-Mellon University, December 1994. + +[IMAP-COMPAT] Crispin, M., "IMAP4 Compatibility with IMAP2bis", RFC +2061, University of Washington, November 1996. + +[IMAP-DISC] Austein, R., "Synchronization Operations for Disconnected +IMAP4 Clients", Work in Progress. + +[IMAP-HISTORICAL] Crispin, M. "IMAP4 Compatibility with IMAP2 and +IMAP2bis", RFC 1732, University of Washington, December 1994. + +[IMAP-MODEL] Crispin, M., "Distributed Electronic Mail Models in +IMAP4", RFC 1733, University of Washington, December 1994. + +[IMAP-OBSOLETE] Crispin, M., "Internet Message Access Protocol - +Obsolete Syntax", RFC 2062, University of Washington, November 1996. + +[IMAP2] Crispin, M., "Interactive Mail Access Protocol - Version 2", +RFC 1176, University of Washington, August 1990. + +[LANGUAGE-TAGS] Alvestrand, H., "Tags for the Identification of +Languages", RFC 1766, March 1995. + +[MD5] Myers, J., and M. Rose, "The Content-MD5 Header Field", RFC +1864, October 1995. + +[MIME-IMB] Freed, N., and N. Borenstein, "MIME (Multipurpose Internet +Mail Extensions) Part One: Format of Internet Message Bodies", RFC +2045, November 1996. + +[MIME-IMT] Freed, N., and N. Borenstein, "MIME (Multipurpose +Internet Mail Extensions) Part Two: Media Types", RFC 2046, +November 1996. + + + +Crispin Standards Track [Page 76] + +RFC 2060 IMAP4rev1 December 1996 + + +[MIME-HDRS] Moore, K., "MIME (Multipurpose Internet Mail Extensions) +Part Three: Message Header Extensions for Non-ASCII Text", RFC +2047, November 1996. + +[RFC-822] Crocker, D., "Standard for the Format of ARPA Internet Text +Messages", STD 11, RFC 822, University of Delaware, August 1982. + +[SMTP] Postel, J., "Simple Mail Transfer Protocol", STD 10, +RFC 821, USC/Information Sciences Institute, August 1982. + +[UTF-7] Goldsmith, D., and Davis, M., "UTF-7: A Mail-Safe +Transformation Format of Unicode", RFC 1642, July 1994. + +B. Changes from RFC 1730 + +1) The STATUS command has been added. + +2) Clarify in the formal syntax that the "#" construct can never +refer to multiple spaces. + +3) Obsolete syntax has been moved to a separate document. + +4) The PARTIAL command has been obsoleted. + +5) The RFC822.HEADER.LINES, RFC822.HEADER.LINES.NOT, RFC822.PEEK, and +RFC822.TEXT.PEEK fetch attributes have been obsoleted. + +6) The "<" origin "." size ">" suffix for BODY text attributes has +been added. + +7) The HEADER, HEADER.FIELDS, HEADER.FIELDS.NOT, MIME, and TEXT part +specifiers have been added. + +8) Support for Content-Disposition and Content-Language has been +added. + +9) The restriction on fetching nested MULTIPART parts has been +removed. + +10) Body part number 0 has been obsoleted. + +11) Server-supported authenticators are now identified by +capabilities. + + + + + + + + +Crispin Standards Track [Page 77] + +RFC 2060 IMAP4rev1 December 1996 + + +12) The capability that identifies this protocol is now called +"IMAP4rev1". A server that provides backwards support for RFC 1730 +SHOULD emit the "IMAP4" capability in addition to "IMAP4rev1" in its +CAPABILITY response. Because RFC-1730 required "IMAP4" to appear as +the first capability, it MUST listed first in the response. + +13) A description of the mailbox name namespace convention has been +added. + +14) A description of the international mailbox name convention has +been added. + +15) The UID-NEXT and UID-VALIDITY status items are now called UIDNEXT +and UIDVALIDITY. This is a change from the IMAP STATUS +Work in Progress and not from RFC-1730 + +16) Add a clarification that a null mailbox name argument to the LIST +command returns an untagged LIST response with the hierarchy +delimiter and root of the reference argument. + +17) Define terms such as "MUST", "SHOULD", and "MUST NOT". + +18) Add a section which defines message attributes and more +thoroughly details the semantics of message sequence numbers, UIDs, +and flags. + +19) Add a clarification detailing the circumstances when a client may +send multiple commands without waiting for a response, and the +circumstances in which ambiguities may result. + +20) Add a recommendation on server behavior for DELETE and RENAME +when inferior hierarchical names of the given name exist. + +21) Add a clarification that a mailbox name may not be unilaterally +unsubscribed by the server, even if that mailbox name no longer +exists. + +22) Add a clarification that LIST should return its results quickly +without undue delay. + +23) Add a clarification that the date_time argument to APPEND sets +the internal date of the message. + +24) Add a clarification on APPEND behavior when the target mailbox is +the currently selected mailbox. + + + + + + +Crispin Standards Track [Page 78] + +RFC 2060 IMAP4rev1 December 1996 + + +25) Add a clarification that external changes to flags should be +always announced via an untagged FETCH even if the current command is +a STORE with the ".SILENT" suffix. + +26) Add a clarification that COPY appends to the target mailbox. + +27) Add the NEWNAME response code. + +28) Rewrite the description of the untagged BYE response to clarify +its semantics. + +29) Change the reference for the body MD5 to refer to the proper RFC. + +30) Clarify that the formal syntax contains rules which may overlap, +and that in the event of such an overlap the rule which occurs first +takes precedence. + +31) Correct the definition of body_fld_param. + +32) More formal syntax for capability_data. + +33) Clarify that any case variant of "INBOX" must be interpreted as +INBOX. + +34) Clarify that the human-readable text in resp_text should not +begin with "[" or "=". + +35) Change MIME references to Draft Standard documents. + +36) Clarify \Recent semantics. + +37) Additional examples. + +C. Key Word Index + + +FLAGS (store command data item) ............... 45 + +FLAGS.SILENT (store command data item) ........ 46 + -FLAGS (store command data item) ............... 46 + -FLAGS.SILENT (store command data item) ........ 46 + ALERT (response code) ...................................... 50 + ALL (fetch item) ........................................... 41 + ALL (search key) ........................................... 38 + ANSWERED (search key) ...................................... 38 + APPEND (command) ........................................... 34 + AUTHENTICATE (command) ..................................... 20 + BAD (response) ............................................. 52 + BCC (search key) .................................. 38 + BEFORE (search key) ................................. 39 + + + +Crispin Standards Track [Page 79] + +RFC 2060 IMAP4rev1 December 1996 + + + BODY (fetch item) .......................................... 41 + BODY (fetch result) ........................................ 58 + BODY (search key) ................................. 39 + BODY.PEEK[
]<> (fetch item) ............... 44 + BODYSTRUCTURE (fetch item) ................................. 44 + BODYSTRUCTURE (fetch result) ............................... 59 + BODY[
]<> (fetch result) ............. 58 + BODY[
]<> (fetch item) .................... 41 + BYE (response) ............................................. 52 + Body Structure (message attribute) ......................... 11 + CAPABILITY (command) ....................................... 18 + CAPABILITY (response) ...................................... 53 + CC (search key) ................................... 39 + CHECK (command) ............................................ 36 + CLOSE (command) ............................................ 36 + COPY (command) ............................................. 46 + CREATE (command) ........................................... 25 + DELETE (command) ........................................... 26 + DELETED (search key) ....................................... 39 + DRAFT (search key) ......................................... 39 + ENVELOPE (fetch item) ...................................... 44 + ENVELOPE (fetch result) .................................... 62 + EXAMINE (command) .......................................... 24 + EXISTS (response) .......................................... 56 + EXPUNGE (command) .......................................... 37 + EXPUNGE (response) ......................................... 57 + Envelope Structure (message attribute) ..................... 11 + FAST (fetch item) .......................................... 44 + FETCH (command) ............................................ 41 + FETCH (response) ........................................... 58 + FLAGGED (search key) ....................................... 39 + FLAGS (fetch item) ......................................... 44 + FLAGS (fetch result) ....................................... 62 + FLAGS (response) ........................................... 56 + FLAGS (store command data item) ................ 45 + FLAGS.SILENT (store command data item) ......... 45 + FROM (search key) ................................. 39 + FULL (fetch item) .......................................... 44 + Flags (message attribute) .................................. 9 + HEADER (part specifier) .................................... 41 + HEADER (search key) .................. 39 + HEADER.FIELDS (part specifier) ............... 41 + HEADER.FIELDS.NOT (part specifier) ........... 41 + INTERNALDATE (fetch item) .................................. 44 + INTERNALDATE (fetch result) ................................ 62 + Internal Date (message attribute) .......................... 10 + KEYWORD (search key) ................................ 39 + Keyword (type of flag) ..................................... 10 + + + +Crispin Standards Track [Page 80] + +RFC 2060 IMAP4rev1 December 1996 + + + LARGER (search key) .................................... 39 + LIST (command) ............................................. 30 + LIST (response) ............................................ 54 + LOGIN (command) ............................................ 22 + LOGOUT (command) ........................................... 20 + LSUB (command) ............................................. 32 + LSUB (response) ............................................ 55 + MAY (specification requirement term) ....................... 5 + MESSAGES (status item) ..................................... 33 + MIME (part specifier) ...................................... 42 + MUST (specification requirement term) ...................... 4 + MUST NOT (specification requirement term) .................. 4 + Message Sequence Number (message attribute) ................ 9 + NEW (search key) ........................................... 39 + NEWNAME (response code) .................................... 50 + NO (response) .............................................. 51 + NOOP (command) ............................................. 19 + NOT (search key) .............................. 39 + OK (response) .............................................. 51 + OLD (search key) ........................................... 39 + ON (search key) ..................................... 39 + OPTIONAL (specification requirement term) .................. 5 + OR (search key) ................ 39 + PARSE (response code) ...................................... 50 + PERMANENTFLAGS (response code) ............................. 50 + PREAUTH (response) ......................................... 52 + Permanent Flag (class of flag) ............................. 10 + READ-ONLY (response code) .................................. 50 + READ-WRITE (response code) ................................. 50 + RECENT (response) .......................................... 57 + RECENT (search key) ........................................ 39 + RECENT (status item) ....................................... 33 + RENAME (command) ........................................... 27 + REQUIRED (specification requirement term) .................. 4 + RFC822 (fetch item) ........................................ 44 + RFC822 (fetch result) ...................................... 63 + RFC822.HEADER (fetch item) ................................. 44 + RFC822.HEADER (fetch result) ............................... 62 + RFC822.SIZE (fetch item) ................................... 44 + RFC822.SIZE (fetch result) ................................. 62 + RFC822.TEXT (fetch item) ................................... 44 + RFC822.TEXT (fetch result) ................................. 62 + SEARCH (command) ........................................... 37 + SEARCH (response) .......................................... 55 + SEEN (search key) .......................................... 40 + SELECT (command) ........................................... 23 + SENTBEFORE (search key) ............................. 40 + SENTON (search key) ................................. 40 + + + +Crispin Standards Track [Page 81] + +RFC 2060 IMAP4rev1 December 1996 + + + SENTSINCE (search key) .............................. 40 + SHOULD (specification requirement term) .................... 5 + SHOULD NOT (specification requirement term) ................ 5 + SINCE (search key) .................................. 40 + SMALLER (search key) ................................... 40 + STATUS (command) ........................................... 33 + STATUS (response) .......................................... 55 + STORE (command) ............................................ 45 + SUBJECT (search key) .............................. 40 + SUBSCRIBE (command) ........................................ 29 + Session Flag (class of flag) ............................... 10 + System Flag (type of flag) ................................. 9 + TEXT (part specifier) ...................................... 42 + TEXT (search key) ................................. 40 + TO (search key) ................................... 40 + TRYCREATE (response code) .................................. 51 + UID (command) .............................................. 47 + UID (fetch item) ........................................... 44 + UID (fetch result) ......................................... 63 + UID (search key) ............................. 40 + UIDNEXT (status item) ...................................... 33 + UIDVALIDITY (response code) ................................ 51 + UIDVALIDITY (status item) .................................. 34 + UNANSWERED (search key) .................................... 40 + UNDELETED (search key) ..................................... 40 + UNDRAFT (search key) ....................................... 40 + UNFLAGGED (search key) ..................................... 40 + UNKEYWORD (search key) .............................. 40 + UNSEEN (response code) ..................................... 51 + UNSEEN (search key) ........................................ 40 + UNSEEN (status item) ....................................... 34 + UNSUBSCRIBE (command) ...................................... 30 + Unique Identifier (UID) (message attribute) ................ 7 + X (command) .......................................... 48 + [RFC-822] Size (message attribute) ......................... 11 + \Answered (system flag) .................................... 9 + \Deleted (system flag) ..................................... 9 + \Draft (system flag) ....................................... 9 + \Flagged (system flag) ..................................... 9 + \Marked (mailbox name attribute) ........................... 54 + \Noinferiors (mailbox name attribute) ...................... 54 + \Noselect (mailbox name attribute) ......................... 54 + \Recent (system flag) ...................................... 10 + \Seen (system flag) ........................................ 9 + \Unmarked (mailbox name attribute) ......................... 54 + + + + + + +Crispin Standards Track [Page 82] + diff --git a/misc/rfc2616-http.txt b/misc/rfc2616-http.txt new file mode 100644 index 0000000..45d7d08 --- /dev/null +++ b/misc/rfc2616-http.txt @@ -0,0 +1,9859 @@ + + + + + + +Network Working Group R. Fielding +Request for Comments: 2616 UC Irvine +Obsoletes: 2068 J. Gettys +Category: Standards Track Compaq/W3C + J. Mogul + Compaq + H. Frystyk + W3C/MIT + L. Masinter + Xerox + P. Leach + Microsoft + T. Berners-Lee + W3C/MIT + June 1999 + + + Hypertext Transfer Protocol -- HTTP/1.1 + +Status of this Memo + + This document specifies an Internet standards track protocol for the + Internet community, and requests discussion and suggestions for + improvements. Please refer to the current edition of the "Internet + Official Protocol Standards" (STD 1) for the standardization state + and status of this protocol. Distribution of this memo is unlimited. + +Copyright Notice + + Copyright (C) The Internet Society (1999). All Rights Reserved. + +Abstract + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. It is a generic, stateless, protocol which can be used for + many tasks beyond its use for hypertext, such as name servers and + distributed object management systems, through extension of its + request methods, error codes and headers [47]. A feature of HTTP is + the typing and negotiation of data representation, allowing systems + to be built independently of the data being transferred. + + HTTP has been in use by the World-Wide Web global information + initiative since 1990. This specification defines the protocol + referred to as "HTTP/1.1", and is an update to RFC 2068 [33]. + + + + + + +Fielding, et al. Standards Track [Page 1] + +RFC 2616 HTTP/1.1 June 1999 + + +Table of Contents + + 1 Introduction ...................................................7 + 1.1 Purpose......................................................7 + 1.2 Requirements .................................................8 + 1.3 Terminology ..................................................8 + 1.4 Overall Operation ...........................................12 + 2 Notational Conventions and Generic Grammar ....................14 + 2.1 Augmented BNF ...............................................14 + 2.2 Basic Rules .................................................15 + 3 Protocol Parameters ...........................................17 + 3.1 HTTP Version ................................................17 + 3.2 Uniform Resource Identifiers ................................18 + 3.2.1 General Syntax ...........................................19 + 3.2.2 http URL .................................................19 + 3.2.3 URI Comparison ...........................................20 + 3.3 Date/Time Formats ...........................................20 + 3.3.1 Full Date ................................................20 + 3.3.2 Delta Seconds ............................................21 + 3.4 Character Sets ..............................................21 + 3.4.1 Missing Charset ..........................................22 + 3.5 Content Codings .............................................23 + 3.6 Transfer Codings ............................................24 + 3.6.1 Chunked Transfer Coding ..................................25 + 3.7 Media Types .................................................26 + 3.7.1 Canonicalization and Text Defaults .......................27 + 3.7.2 Multipart Types ..........................................27 + 3.8 Product Tokens ..............................................28 + 3.9 Quality Values ..............................................29 + 3.10 Language Tags ...............................................29 + 3.11 Entity Tags .................................................30 + 3.12 Range Units .................................................30 + 4 HTTP Message ..................................................31 + 4.1 Message Types ...............................................31 + 4.2 Message Headers .............................................31 + 4.3 Message Body ................................................32 + 4.4 Message Length ..............................................33 + 4.5 General Header Fields .......................................34 + 5 Request .......................................................35 + 5.1 Request-Line ................................................35 + 5.1.1 Method ...................................................36 + 5.1.2 Request-URI ..............................................36 + 5.2 The Resource Identified by a Request ........................38 + 5.3 Request Header Fields .......................................38 + 6 Response ......................................................39 + 6.1 Status-Line .................................................39 + 6.1.1 Status Code and Reason Phrase ............................39 + 6.2 Response Header Fields ......................................41 + + + +Fielding, et al. Standards Track [Page 2] + +RFC 2616 HTTP/1.1 June 1999 + + + 7 Entity ........................................................42 + 7.1 Entity Header Fields ........................................42 + 7.2 Entity Body .................................................43 + 7.2.1 Type .....................................................43 + 7.2.2 Entity Length ............................................43 + 8 Connections ...................................................44 + 8.1 Persistent Connections ......................................44 + 8.1.1 Purpose ..................................................44 + 8.1.2 Overall Operation ........................................45 + 8.1.3 Proxy Servers ............................................46 + 8.1.4 Practical Considerations .................................46 + 8.2 Message Transmission Requirements ...........................47 + 8.2.1 Persistent Connections and Flow Control ..................47 + 8.2.2 Monitoring Connections for Error Status Messages .........48 + 8.2.3 Use of the 100 (Continue) Status .........................48 + 8.2.4 Client Behavior if Server Prematurely Closes Connection ..50 + 9 Method Definitions ............................................51 + 9.1 Safe and Idempotent Methods .................................51 + 9.1.1 Safe Methods .............................................51 + 9.1.2 Idempotent Methods .......................................51 + 9.2 OPTIONS .....................................................52 + 9.3 GET .........................................................53 + 9.4 HEAD ........................................................54 + 9.5 POST ........................................................54 + 9.6 PUT .........................................................55 + 9.7 DELETE ......................................................56 + 9.8 TRACE .......................................................56 + 9.9 CONNECT .....................................................57 + 10 Status Code Definitions ......................................57 + 10.1 Informational 1xx ...........................................57 + 10.1.1 100 Continue .............................................58 + 10.1.2 101 Switching Protocols ..................................58 + 10.2 Successful 2xx ..............................................58 + 10.2.1 200 OK ...................................................58 + 10.2.2 201 Created ..............................................59 + 10.2.3 202 Accepted .............................................59 + 10.2.4 203 Non-Authoritative Information ........................59 + 10.2.5 204 No Content ...........................................60 + 10.2.6 205 Reset Content ........................................60 + 10.2.7 206 Partial Content ......................................60 + 10.3 Redirection 3xx .............................................61 + 10.3.1 300 Multiple Choices .....................................61 + 10.3.2 301 Moved Permanently ....................................62 + 10.3.3 302 Found ................................................62 + 10.3.4 303 See Other ............................................63 + 10.3.5 304 Not Modified .........................................63 + 10.3.6 305 Use Proxy ............................................64 + 10.3.7 306 (Unused) .............................................64 + + + +Fielding, et al. Standards Track [Page 3] + +RFC 2616 HTTP/1.1 June 1999 + + + 10.3.8 307 Temporary Redirect ...................................65 + 10.4 Client Error 4xx ............................................65 + 10.4.1 400 Bad Request .........................................65 + 10.4.2 401 Unauthorized ........................................66 + 10.4.3 402 Payment Required ....................................66 + 10.4.4 403 Forbidden ...........................................66 + 10.4.5 404 Not Found ...........................................66 + 10.4.6 405 Method Not Allowed ..................................66 + 10.4.7 406 Not Acceptable ......................................67 + 10.4.8 407 Proxy Authentication Required .......................67 + 10.4.9 408 Request Timeout .....................................67 + 10.4.10 409 Conflict ............................................67 + 10.4.11 410 Gone ................................................68 + 10.4.12 411 Length Required .....................................68 + 10.4.13 412 Precondition Failed .................................68 + 10.4.14 413 Request Entity Too Large ............................69 + 10.4.15 414 Request-URI Too Long ................................69 + 10.4.16 415 Unsupported Media Type ..............................69 + 10.4.17 416 Requested Range Not Satisfiable .....................69 + 10.4.18 417 Expectation Failed ..................................70 + 10.5 Server Error 5xx ............................................70 + 10.5.1 500 Internal Server Error ................................70 + 10.5.2 501 Not Implemented ......................................70 + 10.5.3 502 Bad Gateway ..........................................70 + 10.5.4 503 Service Unavailable ..................................70 + 10.5.5 504 Gateway Timeout ......................................71 + 10.5.6 505 HTTP Version Not Supported ...........................71 + 11 Access Authentication ........................................71 + 12 Content Negotiation ..........................................71 + 12.1 Server-driven Negotiation ...................................72 + 12.2 Agent-driven Negotiation ....................................73 + 12.3 Transparent Negotiation .....................................74 + 13 Caching in HTTP ..............................................74 + 13.1.1 Cache Correctness ........................................75 + 13.1.2 Warnings .................................................76 + 13.1.3 Cache-control Mechanisms .................................77 + 13.1.4 Explicit User Agent Warnings .............................78 + 13.1.5 Exceptions to the Rules and Warnings .....................78 + 13.1.6 Client-controlled Behavior ...............................79 + 13.2 Expiration Model ............................................79 + 13.2.1 Server-Specified Expiration ..............................79 + 13.2.2 Heuristic Expiration .....................................80 + 13.2.3 Age Calculations .........................................80 + 13.2.4 Expiration Calculations ..................................83 + 13.2.5 Disambiguating Expiration Values .........................84 + 13.2.6 Disambiguating Multiple Responses ........................84 + 13.3 Validation Model ............................................85 + 13.3.1 Last-Modified Dates ......................................86 + + + +Fielding, et al. Standards Track [Page 4] + +RFC 2616 HTTP/1.1 June 1999 + + + 13.3.2 Entity Tag Cache Validators ..............................86 + 13.3.3 Weak and Strong Validators ...............................86 + 13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates.89 + 13.3.5 Non-validating Conditionals ..............................90 + 13.4 Response Cacheability .......................................91 + 13.5 Constructing Responses From Caches ..........................92 + 13.5.1 End-to-end and Hop-by-hop Headers ........................92 + 13.5.2 Non-modifiable Headers ...................................92 + 13.5.3 Combining Headers ........................................94 + 13.5.4 Combining Byte Ranges ....................................95 + 13.6 Caching Negotiated Responses ................................95 + 13.7 Shared and Non-Shared Caches ................................96 + 13.8 Errors or Incomplete Response Cache Behavior ................97 + 13.9 Side Effects of GET and HEAD ................................97 + 13.10 Invalidation After Updates or Deletions ...................97 + 13.11 Write-Through Mandatory ...................................98 + 13.12 Cache Replacement .........................................99 + 13.13 History Lists .............................................99 + 14 Header Field Definitions ....................................100 + 14.1 Accept .....................................................100 + 14.2 Accept-Charset .............................................102 + 14.3 Accept-Encoding ............................................102 + 14.4 Accept-Language ............................................104 + 14.5 Accept-Ranges ..............................................105 + 14.6 Age ........................................................106 + 14.7 Allow ......................................................106 + 14.8 Authorization ..............................................107 + 14.9 Cache-Control ..............................................108 + 14.9.1 What is Cacheable .......................................109 + 14.9.2 What May be Stored by Caches ............................110 + 14.9.3 Modifications of the Basic Expiration Mechanism .........111 + 14.9.4 Cache Revalidation and Reload Controls ..................113 + 14.9.5 No-Transform Directive ..................................115 + 14.9.6 Cache Control Extensions ................................116 + 14.10 Connection ...............................................117 + 14.11 Content-Encoding .........................................118 + 14.12 Content-Language .........................................118 + 14.13 Content-Length ...........................................119 + 14.14 Content-Location .........................................120 + 14.15 Content-MD5 ..............................................121 + 14.16 Content-Range ............................................122 + 14.17 Content-Type .............................................124 + 14.18 Date .....................................................124 + 14.18.1 Clockless Origin Server Operation ......................125 + 14.19 ETag .....................................................126 + 14.20 Expect ...................................................126 + 14.21 Expires ..................................................127 + 14.22 From .....................................................128 + + + +Fielding, et al. Standards Track [Page 5] + +RFC 2616 HTTP/1.1 June 1999 + + + 14.23 Host .....................................................128 + 14.24 If-Match .................................................129 + 14.25 If-Modified-Since ........................................130 + 14.26 If-None-Match ............................................132 + 14.27 If-Range .................................................133 + 14.28 If-Unmodified-Since ......................................134 + 14.29 Last-Modified ............................................134 + 14.30 Location .................................................135 + 14.31 Max-Forwards .............................................136 + 14.32 Pragma ...................................................136 + 14.33 Proxy-Authenticate .......................................137 + 14.34 Proxy-Authorization ......................................137 + 14.35 Range ....................................................138 + 14.35.1 Byte Ranges ...........................................138 + 14.35.2 Range Retrieval Requests ..............................139 + 14.36 Referer ..................................................140 + 14.37 Retry-After ..............................................141 + 14.38 Server ...................................................141 + 14.39 TE .......................................................142 + 14.40 Trailer ..................................................143 + 14.41 Transfer-Encoding..........................................143 + 14.42 Upgrade ..................................................144 + 14.43 User-Agent ...............................................145 + 14.44 Vary .....................................................145 + 14.45 Via ......................................................146 + 14.46 Warning ..................................................148 + 14.47 WWW-Authenticate .........................................150 + 15 Security Considerations .......................................150 + 15.1 Personal Information....................................151 + 15.1.1 Abuse of Server Log Information .........................151 + 15.1.2 Transfer of Sensitive Information .......................151 + 15.1.3 Encoding Sensitive Information in URI's .................152 + 15.1.4 Privacy Issues Connected to Accept Headers ..............152 + 15.2 Attacks Based On File and Path Names .......................153 + 15.3 DNS Spoofing ...............................................154 + 15.4 Location Headers and Spoofing ..............................154 + 15.5 Content-Disposition Issues .................................154 + 15.6 Authentication Credentials and Idle Clients ................155 + 15.7 Proxies and Caching ........................................155 + 15.7.1 Denial of Service Attacks on Proxies....................156 + 16 Acknowledgments .............................................156 + 17 References ..................................................158 + 18 Authors' Addresses ..........................................162 + 19 Appendices ..................................................164 + 19.1 Internet Media Type message/http and application/http ......164 + 19.2 Internet Media Type multipart/byteranges ...................165 + 19.3 Tolerant Applications ......................................166 + 19.4 Differences Between HTTP Entities and RFC 2045 Entities ....167 + + + +Fielding, et al. Standards Track [Page 6] + +RFC 2616 HTTP/1.1 June 1999 + + + 19.4.1 MIME-Version ............................................167 + 19.4.2 Conversion to Canonical Form ............................167 + 19.4.3 Conversion of Date Formats ..............................168 + 19.4.4 Introduction of Content-Encoding ........................168 + 19.4.5 No Content-Transfer-Encoding ............................168 + 19.4.6 Introduction of Transfer-Encoding .......................169 + 19.4.7 MHTML and Line Length Limitations .......................169 + 19.5 Additional Features ........................................169 + 19.5.1 Content-Disposition .....................................170 + 19.6 Compatibility with Previous Versions .......................170 + 19.6.1 Changes from HTTP/1.0 ...................................171 + 19.6.2 Compatibility with HTTP/1.0 Persistent Connections ......172 + 19.6.3 Changes from RFC 2068 ...................................172 + 20 Index .......................................................175 + 21 Full Copyright Statement ....................................176 + +1 Introduction + +1.1 Purpose + + The Hypertext Transfer Protocol (HTTP) is an application-level + protocol for distributed, collaborative, hypermedia information + systems. HTTP has been in use by the World-Wide Web global + information initiative since 1990. The first version of HTTP, + referred to as HTTP/0.9, was a simple protocol for raw data transfer + across the Internet. HTTP/1.0, as defined by RFC 1945 [6], improved + the protocol by allowing messages to be in the format of MIME-like + messages, containing metainformation about the data transferred and + modifiers on the request/response semantics. However, HTTP/1.0 does + not sufficiently take into consideration the effects of hierarchical + proxies, caching, the need for persistent connections, or virtual + hosts. In addition, the proliferation of incompletely-implemented + applications calling themselves "HTTP/1.0" has necessitated a + protocol version change in order for two communicating applications + to determine each other's true capabilities. + + This specification defines the protocol referred to as "HTTP/1.1". + This protocol includes more stringent requirements than HTTP/1.0 in + order to ensure reliable implementation of its features. + + Practical information systems require more functionality than simple + retrieval, including search, front-end update, and annotation. HTTP + allows an open-ended set of methods and headers that indicate the + purpose of a request [47]. It builds on the discipline of reference + provided by the Uniform Resource Identifier (URI) [3], as a location + (URL) [4] or name (URN) [20], for indicating the resource to which a + + + + + +Fielding, et al. Standards Track [Page 7] + +RFC 2616 HTTP/1.1 June 1999 + + + method is to be applied. Messages are passed in a format similar to + that used by Internet mail [9] as defined by the Multipurpose + Internet Mail Extensions (MIME) [7]. + + HTTP is also used as a generic protocol for communication between + user agents and proxies/gateways to other Internet systems, including + those supported by the SMTP [16], NNTP [13], FTP [18], Gopher [2], + and WAIS [10] protocols. In this way, HTTP allows basic hypermedia + access to resources available from diverse applications. + +1.2 Requirements + + The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", + "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in RFC 2119 [34]. + + An implementation is not compliant if it fails to satisfy one or more + of the MUST or REQUIRED level requirements for the protocols it + implements. An implementation that satisfies all the MUST or REQUIRED + level and all the SHOULD level requirements for its protocols is said + to be "unconditionally compliant"; one that satisfies all the MUST + level requirements but not all the SHOULD level requirements for its + protocols is said to be "conditionally compliant." + +1.3 Terminology + + This specification uses a number of terms to refer to the roles + played by participants in, and objects of, the HTTP communication. + + connection + A transport layer virtual circuit established between two programs + for the purpose of communication. + + message + The basic unit of HTTP communication, consisting of a structured + sequence of octets matching the syntax defined in section 4 and + transmitted via the connection. + + request + An HTTP request message, as defined in section 5. + + response + An HTTP response message, as defined in section 6. + + + + + + + + +Fielding, et al. Standards Track [Page 8] + +RFC 2616 HTTP/1.1 June 1999 + + + resource + A network data object or service that can be identified by a URI, + as defined in section 3.2. Resources may be available in multiple + representations (e.g. multiple languages, data formats, size, and + resolutions) or vary in other ways. + + entity + The information transferred as the payload of a request or + response. An entity consists of metainformation in the form of + entity-header fields and content in the form of an entity-body, as + described in section 7. + + representation + An entity included with a response that is subject to content + negotiation, as described in section 12. There may exist multiple + representations associated with a particular response status. + + content negotiation + The mechanism for selecting the appropriate representation when + servicing a request, as described in section 12. The + representation of entities in any response can be negotiated + (including error responses). + + variant + A resource may have one, or more than one, representation(s) + associated with it at any given instant. Each of these + representations is termed a `varriant'. Use of the term `variant' + does not necessarily imply that the resource is subject to content + negotiation. + + client + A program that establishes connections for the purpose of sending + requests. + + user agent + The client which initiates a request. These are often browsers, + editors, spiders (web-traversing robots), or other end user tools. + + server + An application program that accepts connections in order to + service requests by sending back responses. Any given program may + be capable of being both a client and a server; our use of these + terms refers only to the role being performed by the program for a + particular connection, rather than to the program's capabilities + in general. Likewise, any server may act as an origin server, + proxy, gateway, or tunnel, switching behavior based on the nature + of each request. + + + + +Fielding, et al. Standards Track [Page 9] + +RFC 2616 HTTP/1.1 June 1999 + + + origin server + The server on which a given resource resides or is to be created. + + proxy + An intermediary program which acts as both a server and a client + for the purpose of making requests on behalf of other clients. + Requests are serviced internally or by passing them on, with + possible translation, to other servers. A proxy MUST implement + both the client and server requirements of this specification. A + "transparent proxy" is a proxy that does not modify the request or + response beyond what is required for proxy authentication and + identification. A "non-transparent proxy" is a proxy that modifies + the request or response in order to provide some added service to + the user agent, such as group annotation services, media type + transformation, protocol reduction, or anonymity filtering. Except + where either transparent or non-transparent behavior is explicitly + stated, the HTTP proxy requirements apply to both types of + proxies. + + gateway + A server which acts as an intermediary for some other server. + Unlike a proxy, a gateway receives requests as if it were the + origin server for the requested resource; the requesting client + may not be aware that it is communicating with a gateway. + + tunnel + An intermediary program which is acting as a blind relay between + two connections. Once active, a tunnel is not considered a party + to the HTTP communication, though the tunnel may have been + initiated by an HTTP request. The tunnel ceases to exist when both + ends of the relayed connections are closed. + + cache + A program's local store of response messages and the subsystem + that controls its message storage, retrieval, and deletion. A + cache stores cacheable responses in order to reduce the response + time and network bandwidth consumption on future, equivalent + requests. Any client or server may include a cache, though a cache + cannot be used by a server that is acting as a tunnel. + + cacheable + A response is cacheable if a cache is allowed to store a copy of + the response message for use in answering subsequent requests. The + rules for determining the cacheability of HTTP responses are + defined in section 13. Even if a resource is cacheable, there may + be additional constraints on whether a cache can use the cached + copy for a particular request. + + + + +Fielding, et al. Standards Track [Page 10] + +RFC 2616 HTTP/1.1 June 1999 + + + first-hand + A response is first-hand if it comes directly and without + unnecessary delay from the origin server, perhaps via one or more + proxies. A response is also first-hand if its validity has just + been checked directly with the origin server. + + explicit expiration time + The time at which the origin server intends that an entity should + no longer be returned by a cache without further validation. + + heuristic expiration time + An expiration time assigned by a cache when no explicit expiration + time is available. + + age + The age of a response is the time since it was sent by, or + successfully validated with, the origin server. + + freshness lifetime + The length of time between the generation of a response and its + expiration time. + + fresh + A response is fresh if its age has not yet exceeded its freshness + lifetime. + + stale + A response is stale if its age has passed its freshness lifetime. + + semantically transparent + A cache behaves in a "semantically transparent" manner, with + respect to a particular response, when its use affects neither the + requesting client nor the origin server, except to improve + performance. When a cache is semantically transparent, the client + receives exactly the same response (except for hop-by-hop headers) + that it would have received had its request been handled directly + by the origin server. + + validator + A protocol element (e.g., an entity tag or a Last-Modified time) + that is used to find out whether a cache entry is an equivalent + copy of an entity. + + upstream/downstream + Upstream and downstream describe the flow of a message: all + messages flow from upstream to downstream. + + + + + +Fielding, et al. Standards Track [Page 11] + +RFC 2616 HTTP/1.1 June 1999 + + + inbound/outbound + Inbound and outbound refer to the request and response paths for + messages: "inbound" means "traveling toward the origin server", + and "outbound" means "traveling toward the user agent" + +1.4 Overall Operation + + The HTTP protocol is a request/response protocol. A client sends a + request to the server in the form of a request method, URI, and + protocol version, followed by a MIME-like message containing request + modifiers, client information, and possible body content over a + connection with a server. The server responds with a status line, + including the message's protocol version and a success or error code, + followed by a MIME-like message containing server information, entity + metainformation, and possible entity-body content. The relationship + between HTTP and MIME is described in appendix 19.4. + + Most HTTP communication is initiated by a user agent and consists of + a request to be applied to a resource on some origin server. In the + simplest case, this may be accomplished via a single connection (v) + between the user agent (UA) and the origin server (O). + + request chain ------------------------> + UA -------------------v------------------- O + <----------------------- response chain + + A more complicated situation occurs when one or more intermediaries + are present in the request/response chain. There are three common + forms of intermediary: proxy, gateway, and tunnel. A proxy is a + forwarding agent, receiving requests for a URI in its absolute form, + rewriting all or part of the message, and forwarding the reformatted + request toward the server identified by the URI. A gateway is a + receiving agent, acting as a layer above some other server(s) and, if + necessary, translating the requests to the underlying server's + protocol. A tunnel acts as a relay point between two connections + without changing the messages; tunnels are used when the + communication needs to pass through an intermediary (such as a + firewall) even when the intermediary cannot understand the contents + of the messages. + + request chain --------------------------------------> + UA -----v----- A -----v----- B -----v----- C -----v----- O + <------------------------------------- response chain + + The figure above shows three intermediaries (A, B, and C) between the + user agent and origin server. A request or response message that + travels the whole chain will pass through four separate connections. + This distinction is important because some HTTP communication options + + + +Fielding, et al. Standards Track [Page 12] + +RFC 2616 HTTP/1.1 June 1999 + + + may apply only to the connection with the nearest, non-tunnel + neighbor, only to the end-points of the chain, or to all connections + along the chain. Although the diagram is linear, each participant may + be engaged in multiple, simultaneous communications. For example, B + may be receiving requests from many clients other than A, and/or + forwarding requests to servers other than C, at the same time that it + is handling A's request. + + Any party to the communication which is not acting as a tunnel may + employ an internal cache for handling requests. The effect of a cache + is that the request/response chain is shortened if one of the + participants along the chain has a cached response applicable to that + request. The following illustrates the resulting chain if B has a + cached copy of an earlier response from O (via C) for a request which + has not been cached by UA or A. + + request chain ----------> + UA -----v----- A -----v----- B - - - - - - C - - - - - - O + <--------- response chain + + Not all responses are usefully cacheable, and some requests may + contain modifiers which place special requirements on cache behavior. + HTTP requirements for cache behavior and cacheable responses are + defined in section 13. + + In fact, there are a wide variety of architectures and configurations + of caches and proxies currently being experimented with or deployed + across the World Wide Web. These systems include national hierarchies + of proxy caches to save transoceanic bandwidth, systems that + broadcast or multicast cache entries, organizations that distribute + subsets of cached data via CD-ROM, and so on. HTTP systems are used + in corporate intranets over high-bandwidth links, and for access via + PDAs with low-power radio links and intermittent connectivity. The + goal of HTTP/1.1 is to support the wide diversity of configurations + already deployed while introducing protocol constructs that meet the + needs of those who build web applications that require high + reliability and, failing that, at least reliable indications of + failure. + + HTTP communication usually takes place over TCP/IP connections. The + default port is TCP 80 [19], but other ports can be used. This does + not preclude HTTP from being implemented on top of any other protocol + on the Internet, or on other networks. HTTP only presumes a reliable + transport; any protocol that provides such guarantees can be used; + the mapping of the HTTP/1.1 request and response structures onto the + transport data units of the protocol in question is outside the scope + of this specification. + + + + +Fielding, et al. Standards Track [Page 13] + +RFC 2616 HTTP/1.1 June 1999 + + + In HTTP/1.0, most implementations used a new connection for each + request/response exchange. In HTTP/1.1, a connection may be used for + one or more request/response exchanges, although connections may be + closed for a variety of reasons (see section 8.1). + +2 Notational Conventions and Generic Grammar + +2.1 Augmented BNF + + All of the mechanisms specified in this document are described in + both prose and an augmented Backus-Naur Form (BNF) similar to that + used by RFC 822 [9]. Implementors will need to be familiar with the + notation in order to understand this specification. The augmented BNF + includes the following constructs: + + name = definition + The name of a rule is simply the name itself (without any + enclosing "<" and ">") and is separated from its definition by the + equal "=" character. White space is only significant in that + indentation of continuation lines is used to indicate a rule + definition that spans more than one line. Certain basic rules are + in uppercase, such as SP, LWS, HT, CRLF, DIGIT, ALPHA, etc. Angle + brackets are used within definitions whenever their presence will + facilitate discerning the use of rule names. + + "literal" + Quotation marks surround literal text. Unless stated otherwise, + the text is case-insensitive. + + rule1 | rule2 + Elements separated by a bar ("|") are alternatives, e.g., "yes | + no" will accept yes or no. + + (rule1 rule2) + Elements enclosed in parentheses are treated as a single element. + Thus, "(elem (foo | bar) elem)" allows the token sequences "elem + foo elem" and "elem bar elem". + + *rule + The character "*" preceding an element indicates repetition. The + full form is "*element" indicating at least and at most + occurrences of element. Default values are 0 and infinity so + that "*(element)" allows any number, including zero; "1*element" + requires at least one; and "1*2element" allows one or two. + + [rule] + Square brackets enclose optional elements; "[foo bar]" is + equivalent to "*1(foo bar)". + + + +Fielding, et al. Standards Track [Page 14] + +RFC 2616 HTTP/1.1 June 1999 + + + N rule + Specific repetition: "(element)" is equivalent to + "*(element)"; that is, exactly occurrences of (element). + Thus 2DIGIT is a 2-digit number, and 3ALPHA is a string of three + alphabetic characters. + + #rule + A construct "#" is defined, similar to "*", for defining lists of + elements. The full form is "#element" indicating at least + and at most elements, each separated by one or more commas + (",") and OPTIONAL linear white space (LWS). This makes the usual + form of lists very easy; a rule such as + ( *LWS element *( *LWS "," *LWS element )) + can be shown as + 1#element + Wherever this construct is used, null elements are allowed, but do + not contribute to the count of elements present. That is, + "(element), , (element) " is permitted, but counts as only two + elements. Therefore, where at least one element is required, at + least one non-null element MUST be present. Default values are 0 + and infinity so that "#element" allows any number, including zero; + "1#element" requires at least one; and "1#2element" allows one or + two. + + ; comment + A semi-colon, set off some distance to the right of rule text, + starts a comment that continues to the end of line. This is a + simple way of including useful notes in parallel with the + specifications. + + implied *LWS + The grammar described by this specification is word-based. Except + where noted otherwise, linear white space (LWS) can be included + between any two adjacent words (token or quoted-string), and + between adjacent words and separators, without changing the + interpretation of a field. At least one delimiter (LWS and/or + + separators) MUST exist between any two tokens (for the definition + of "token" below), since they would otherwise be interpreted as a + single token. + +2.2 Basic Rules + + The following rules are used throughout this specification to + describe basic parsing constructs. The US-ASCII coded character set + is defined by ANSI X3.4-1986 [21]. + + + + + +Fielding, et al. Standards Track [Page 15] + +RFC 2616 HTTP/1.1 June 1999 + + + OCTET = + CHAR = + UPALPHA = + LOALPHA = + ALPHA = UPALPHA | LOALPHA + DIGIT = + CTL = + CR = + LF = + SP = + HT = + <"> = + + HTTP/1.1 defines the sequence CR LF as the end-of-line marker for all + protocol elements except the entity-body (see appendix 19.3 for + tolerant applications). The end-of-line marker within an entity-body + is defined by its associated media type, as described in section 3.7. + + CRLF = CR LF + + HTTP/1.1 header field values can be folded onto multiple lines if the + continuation line begins with a space or horizontal tab. All linear + white space, including folding, has the same semantics as SP. A + recipient MAY replace any linear white space with a single SP before + interpreting the field value or forwarding the message downstream. + + LWS = [CRLF] 1*( SP | HT ) + + The TEXT rule is only used for descriptive field contents and values + that are not intended to be interpreted by the message parser. Words + of *TEXT MAY contain characters from character sets other than ISO- + 8859-1 [22] only when encoded according to the rules of RFC 2047 + [14]. + + TEXT = + + A CRLF is allowed in the definition of TEXT only as part of a header + field continuation. It is expected that the folding LWS will be + replaced with a single SP before interpretation of the TEXT value. + + Hexadecimal numeric characters are used in several protocol elements. + + HEX = "A" | "B" | "C" | "D" | "E" | "F" + | "a" | "b" | "c" | "d" | "e" | "f" | DIGIT + + + + + +Fielding, et al. Standards Track [Page 16] + +RFC 2616 HTTP/1.1 June 1999 + + + Many HTTP/1.1 header field values consist of words separated by LWS + or special characters. These special characters MUST be in a quoted + string to be used within a parameter value (as defined in section + 3.6). + + token = 1* + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + Comments can be included in some HTTP header fields by surrounding + the comment text with parentheses. Comments are only allowed in + fields containing "comment" as part of their field value definition. + In all other fields, parentheses are considered part of the field + value. + + comment = "(" *( ctext | quoted-pair | comment ) ")" + ctext = + + A string of text is parsed as a single word if it is quoted using + double-quote marks. + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = > + + The backslash character ("\") MAY be used as a single-character + quoting mechanism only within quoted-string and comment constructs. + + quoted-pair = "\" CHAR + +3 Protocol Parameters + +3.1 HTTP Version + + HTTP uses a "." numbering scheme to indicate versions + of the protocol. The protocol versioning policy is intended to allow + the sender to indicate the format of a message and its capacity for + understanding further HTTP communication, rather than the features + obtained via that communication. No change is made to the version + number for the addition of message components which do not affect + communication behavior or which only add to extensible field values. + The number is incremented when the changes made to the + protocol add features which do not change the general message parsing + algorithm, but which may add to the message semantics and imply + additional capabilities of the sender. The number is + incremented when the format of a message within the protocol is + changed. See RFC 2145 [36] for a fuller explanation. + + + +Fielding, et al. Standards Track [Page 17] + +RFC 2616 HTTP/1.1 June 1999 + + + The version of an HTTP message is indicated by an HTTP-Version field + in the first line of the message. + + HTTP-Version = "HTTP" "/" 1*DIGIT "." 1*DIGIT + + Note that the major and minor numbers MUST be treated as separate + integers and that each MAY be incremented higher than a single digit. + Thus, HTTP/2.4 is a lower version than HTTP/2.13, which in turn is + lower than HTTP/12.3. Leading zeros MUST be ignored by recipients and + MUST NOT be sent. + + An application that sends a request or response message that includes + HTTP-Version of "HTTP/1.1" MUST be at least conditionally compliant + with this specification. Applications that are at least conditionally + compliant with this specification SHOULD use an HTTP-Version of + "HTTP/1.1" in their messages, and MUST do so for any message that is + not compatible with HTTP/1.0. For more details on when to send + specific HTTP-Version values, see RFC 2145 [36]. + + The HTTP version of an application is the highest HTTP version for + which the application is at least conditionally compliant. + + Proxy and gateway applications need to be careful when forwarding + messages in protocol versions different from that of the application. + Since the protocol version indicates the protocol capability of the + sender, a proxy/gateway MUST NOT send a message with a version + indicator which is greater than its actual version. If a higher + version request is received, the proxy/gateway MUST either downgrade + the request version, or respond with an error, or switch to tunnel + behavior. + + Due to interoperability problems with HTTP/1.0 proxies discovered + since the publication of RFC 2068[33], caching proxies MUST, gateways + MAY, and tunnels MUST NOT upgrade the request to the highest version + they support. The proxy/gateway's response to that request MUST be in + the same major version as the request. + + Note: Converting between versions of HTTP may involve modification + of header fields required or forbidden by the versions involved. + +3.2 Uniform Resource Identifiers + + URIs have been known by many names: WWW addresses, Universal Document + Identifiers, Universal Resource Identifiers [3], and finally the + combination of Uniform Resource Locators (URL) [4] and Names (URN) + [20]. As far as HTTP is concerned, Uniform Resource Identifiers are + simply formatted strings which identify--via name, location, or any + other characteristic--a resource. + + + +Fielding, et al. Standards Track [Page 18] + +RFC 2616 HTTP/1.1 June 1999 + + +3.2.1 General Syntax + + URIs in HTTP can be represented in absolute form or relative to some + known base URI [11], depending upon the context of their use. The two + forms are differentiated by the fact that absolute URIs always begin + with a scheme name followed by a colon. For definitive information on + URL syntax and semantics, see "Uniform Resource Identifiers (URI): + Generic Syntax and Semantics," RFC 2396 [42] (which replaces RFCs + 1738 [4] and RFC 1808 [11]). This specification adopts the + definitions of "URI-reference", "absoluteURI", "relativeURI", "port", + "host","abs_path", "rel_path", and "authority" from that + specification. + + The HTTP protocol does not place any a priori limit on the length of + a URI. Servers MUST be able to handle the URI of any resource they + serve, and SHOULD be able to handle URIs of unbounded length if they + provide GET-based forms that could generate such URIs. A server + SHOULD return 414 (Request-URI Too Long) status if a URI is longer + than the server can handle (see section 10.4.15). + + Note: Servers ought to be cautious about depending on URI lengths + above 255 bytes, because some older client or proxy + implementations might not properly support these lengths. + +3.2.2 http URL + + The "http" scheme is used to locate network resources via the HTTP + protocol. This section defines the scheme-specific syntax and + semantics for http URLs. + + http_URL = "http:" "//" host [ ":" port ] [ abs_path [ "?" query ]] + + If the port is empty or not given, port 80 is assumed. The semantics + are that the identified resource is located at the server listening + for TCP connections on that port of that host, and the Request-URI + for the resource is abs_path (section 5.1.2). The use of IP addresses + in URLs SHOULD be avoided whenever possible (see RFC 1900 [24]). If + the abs_path is not present in the URL, it MUST be given as "/" when + used as a Request-URI for a resource (section 5.1.2). If a proxy + receives a host name which is not a fully qualified domain name, it + MAY add its domain to the host name it received. If a proxy receives + a fully qualified domain name, the proxy MUST NOT change the host + name. + + + + + + + + +Fielding, et al. Standards Track [Page 19] + +RFC 2616 HTTP/1.1 June 1999 + + +3.2.3 URI Comparison + + When comparing two URIs to decide if they match or not, a client + SHOULD use a case-sensitive octet-by-octet comparison of the entire + URIs, with these exceptions: + + - A port that is empty or not given is equivalent to the default + port for that URI-reference; + + - Comparisons of host names MUST be case-insensitive; + + - Comparisons of scheme names MUST be case-insensitive; + + - An empty abs_path is equivalent to an abs_path of "/". + + Characters other than those in the "reserved" and "unsafe" sets (see + RFC 2396 [42]) are equivalent to their ""%" HEX HEX" encoding. + + For example, the following three URIs are equivalent: + + http://abc.com:80/~smith/home.html + http://ABC.com/%7Esmith/home.html + http://ABC.com:/%7esmith/home.html + +3.3 Date/Time Formats + +3.3.1 Full Date + + HTTP applications have historically allowed three different formats + for the representation of date/time stamps: + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 + Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format + + The first format is preferred as an Internet standard and represents + a fixed-length subset of that defined by RFC 1123 [8] (an update to + RFC 822 [9]). The second format is in common use, but is based on the + obsolete RFC 850 [12] date format and lacks a four-digit year. + HTTP/1.1 clients and servers that parse the date value MUST accept + all three formats (for compatibility with HTTP/1.0), though they MUST + only generate the RFC 1123 format for representing HTTP-date values + in header fields. See section 19.3 for further information. + + Note: Recipients of date values are encouraged to be robust in + accepting date values that may have been sent by non-HTTP + applications, as is sometimes the case when retrieving or posting + messages via proxies/gateways to SMTP or NNTP. + + + +Fielding, et al. Standards Track [Page 20] + +RFC 2616 HTTP/1.1 June 1999 + + + All HTTP date/time stamps MUST be represented in Greenwich Mean Time + (GMT), without exception. For the purposes of HTTP, GMT is exactly + equal to UTC (Coordinated Universal Time). This is indicated in the + first two formats by the inclusion of "GMT" as the three-letter + abbreviation for time zone, and MUST be assumed when reading the + asctime format. HTTP-date is case sensitive and MUST NOT include + additional LWS beyond that specifically included as SP in the + grammar. + + HTTP-date = rfc1123-date | rfc850-date | asctime-date + rfc1123-date = wkday "," SP date1 SP time SP "GMT" + rfc850-date = weekday "," SP date2 SP time SP "GMT" + asctime-date = wkday SP date3 SP time SP 4DIGIT + date1 = 2DIGIT SP month SP 4DIGIT + ; day month year (e.g., 02 Jun 1982) + date2 = 2DIGIT "-" month "-" 2DIGIT + ; day-month-year (e.g., 02-Jun-82) + date3 = month SP ( 2DIGIT | ( SP 1DIGIT )) + ; month day (e.g., Jun 2) + time = 2DIGIT ":" 2DIGIT ":" 2DIGIT + ; 00:00:00 - 23:59:59 + wkday = "Mon" | "Tue" | "Wed" + | "Thu" | "Fri" | "Sat" | "Sun" + weekday = "Monday" | "Tuesday" | "Wednesday" + | "Thursday" | "Friday" | "Saturday" | "Sunday" + month = "Jan" | "Feb" | "Mar" | "Apr" + | "May" | "Jun" | "Jul" | "Aug" + | "Sep" | "Oct" | "Nov" | "Dec" + + Note: HTTP requirements for the date/time stamp format apply only + to their usage within the protocol stream. Clients and servers are + not required to use these formats for user presentation, request + logging, etc. + +3.3.2 Delta Seconds + + Some HTTP header fields allow a time value to be specified as an + integer number of seconds, represented in decimal, after the time + that the message was received. + + delta-seconds = 1*DIGIT + +3.4 Character Sets + + HTTP uses the same definition of the term "character set" as that + described for MIME: + + + + + +Fielding, et al. Standards Track [Page 21] + +RFC 2616 HTTP/1.1 June 1999 + + + The term "character set" is used in this document to refer to a + method used with one or more tables to convert a sequence of octets + into a sequence of characters. Note that unconditional conversion in + the other direction is not required, in that not all characters may + be available in a given character set and a character set may provide + more than one sequence of octets to represent a particular character. + This definition is intended to allow various kinds of character + encoding, from simple single-table mappings such as US-ASCII to + complex table switching methods such as those that use ISO-2022's + techniques. However, the definition associated with a MIME character + set name MUST fully specify the mapping to be performed from octets + to characters. In particular, use of external profiling information + to determine the exact mapping is not permitted. + + Note: This use of the term "character set" is more commonly + referred to as a "character encoding." However, since HTTP and + MIME share the same registry, it is important that the terminology + also be shared. + + HTTP character sets are identified by case-insensitive tokens. The + complete set of tokens is defined by the IANA Character Set registry + [19]. + + charset = token + + Although HTTP allows an arbitrary token to be used as a charset + value, any token that has a predefined value within the IANA + Character Set registry [19] MUST represent the character set defined + by that registry. Applications SHOULD limit their use of character + sets to those defined by the IANA registry. + + Implementors should be aware of IETF character set requirements [38] + [41]. + +3.4.1 Missing Charset + + Some HTTP/1.0 software has interpreted a Content-Type header without + charset parameter incorrectly to mean "recipient should guess." + Senders wishing to defeat this behavior MAY include a charset + parameter even when the charset is ISO-8859-1 and SHOULD do so when + it is known that it will not confuse the recipient. + + Unfortunately, some older HTTP/1.0 clients did not deal properly with + an explicit charset parameter. HTTP/1.1 recipients MUST respect the + charset label provided by the sender; and those user agents that have + a provision to "guess" a charset MUST use the charset from the + + + + + +Fielding, et al. Standards Track [Page 22] + +RFC 2616 HTTP/1.1 June 1999 + + + content-type field if they support that charset, rather than the + recipient's preference, when initially displaying a document. See + section 3.7.1. + +3.5 Content Codings + + Content coding values indicate an encoding transformation that has + been or can be applied to an entity. Content codings are primarily + used to allow a document to be compressed or otherwise usefully + transformed without losing the identity of its underlying media type + and without loss of information. Frequently, the entity is stored in + coded form, transmitted directly, and only decoded by the recipient. + + content-coding = token + + All content-coding values are case-insensitive. HTTP/1.1 uses + content-coding values in the Accept-Encoding (section 14.3) and + Content-Encoding (section 14.11) header fields. Although the value + describes the content-coding, what is more important is that it + indicates what decoding mechanism will be required to remove the + encoding. + + The Internet Assigned Numbers Authority (IANA) acts as a registry for + content-coding value tokens. Initially, the registry contains the + following tokens: + + gzip An encoding format produced by the file compression program + "gzip" (GNU zip) as described in RFC 1952 [25]. This format is a + Lempel-Ziv coding (LZ77) with a 32 bit CRC. + + compress + The encoding format produced by the common UNIX file compression + program "compress". This format is an adaptive Lempel-Ziv-Welch + coding (LZW). + + Use of program names for the identification of encoding formats + is not desirable and is discouraged for future encodings. Their + use here is representative of historical practice, not good + design. For compatibility with previous implementations of HTTP, + applications SHOULD consider "x-gzip" and "x-compress" to be + equivalent to "gzip" and "compress" respectively. + + deflate + The "zlib" format defined in RFC 1950 [31] in combination with + the "deflate" compression mechanism described in RFC 1951 [29]. + + + + + + +Fielding, et al. Standards Track [Page 23] + +RFC 2616 HTTP/1.1 June 1999 + + + identity + The default (identity) encoding; the use of no transformation + whatsoever. This content-coding is used only in the Accept- + Encoding header, and SHOULD NOT be used in the Content-Encoding + header. + + New content-coding value tokens SHOULD be registered; to allow + interoperability between clients and servers, specifications of the + content coding algorithms needed to implement a new value SHOULD be + publicly available and adequate for independent implementation, and + conform to the purpose of content coding defined in this section. + +3.6 Transfer Codings + + Transfer-coding values are used to indicate an encoding + transformation that has been, can be, or may need to be applied to an + entity-body in order to ensure "safe transport" through the network. + This differs from a content coding in that the transfer-coding is a + property of the message, not of the original entity. + + transfer-coding = "chunked" | transfer-extension + transfer-extension = token *( ";" parameter ) + + Parameters are in the form of attribute/value pairs. + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + All transfer-coding values are case-insensitive. HTTP/1.1 uses + transfer-coding values in the TE header field (section 14.39) and in + the Transfer-Encoding header field (section 14.41). + + Whenever a transfer-coding is applied to a message-body, the set of + transfer-codings MUST include "chunked", unless the message is + terminated by closing the connection. When the "chunked" transfer- + coding is used, it MUST be the last transfer-coding applied to the + message-body. The "chunked" transfer-coding MUST NOT be applied more + than once to a message-body. These rules allow the recipient to + determine the transfer-length of the message (section 4.4). + + Transfer-codings are analogous to the Content-Transfer-Encoding + values of MIME [7], which were designed to enable safe transport of + binary data over a 7-bit transport service. However, safe transport + has a different focus for an 8bit-clean transfer protocol. In HTTP, + the only unsafe characteristic of message-bodies is the difficulty in + determining the exact body length (section 7.2.2), or the desire to + encrypt data over a shared transport. + + + +Fielding, et al. Standards Track [Page 24] + +RFC 2616 HTTP/1.1 June 1999 + + + The Internet Assigned Numbers Authority (IANA) acts as a registry for + transfer-coding value tokens. Initially, the registry contains the + following tokens: "chunked" (section 3.6.1), "identity" (section + 3.6.2), "gzip" (section 3.5), "compress" (section 3.5), and "deflate" + (section 3.5). + + New transfer-coding value tokens SHOULD be registered in the same way + as new content-coding value tokens (section 3.5). + + A server which receives an entity-body with a transfer-coding it does + not understand SHOULD return 501 (Unimplemented), and close the + connection. A server MUST NOT send transfer-codings to an HTTP/1.0 + client. + +3.6.1 Chunked Transfer Coding + + The chunked encoding modifies the body of a message in order to + transfer it as a series of chunks, each with its own size indicator, + followed by an OPTIONAL trailer containing entity-header fields. This + allows dynamically produced content to be transferred along with the + information necessary for the recipient to verify that it has + received the full message. + + Chunked-Body = *chunk + last-chunk + trailer + CRLF + + chunk = chunk-size [ chunk-extension ] CRLF + chunk-data CRLF + chunk-size = 1*HEX + last-chunk = 1*("0") [ chunk-extension ] CRLF + + chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] ) + chunk-ext-name = token + chunk-ext-val = token | quoted-string + chunk-data = chunk-size(OCTET) + trailer = *(entity-header CRLF) + + The chunk-size field is a string of hex digits indicating the size of + the chunk. The chunked encoding is ended by any chunk whose size is + zero, followed by the trailer, which is terminated by an empty line. + + The trailer allows the sender to include additional HTTP header + fields at the end of the message. The Trailer header field can be + used to indicate which header fields are included in a trailer (see + section 14.40). + + + + +Fielding, et al. Standards Track [Page 25] + +RFC 2616 HTTP/1.1 June 1999 + + + A server using chunked transfer-coding in a response MUST NOT use the + trailer for any header fields unless at least one of the following is + true: + + a)the request included a TE header field that indicates "trailers" is + acceptable in the transfer-coding of the response, as described in + section 14.39; or, + + b)the server is the origin server for the response, the trailer + fields consist entirely of optional metadata, and the recipient + could use the message (in a manner acceptable to the origin server) + without receiving this metadata. In other words, the origin server + is willing to accept the possibility that the trailer fields might + be silently discarded along the path to the client. + + This requirement prevents an interoperability failure when the + message is being received by an HTTP/1.1 (or later) proxy and + forwarded to an HTTP/1.0 recipient. It avoids a situation where + compliance with the protocol would have necessitated a possibly + infinite buffer on the proxy. + + An example process for decoding a Chunked-Body is presented in + appendix 19.4.6. + + All HTTP/1.1 applications MUST be able to receive and decode the + "chunked" transfer-coding, and MUST ignore chunk-extension extensions + they do not understand. + +3.7 Media Types + + HTTP uses Internet Media Types [17] in the Content-Type (section + 14.17) and Accept (section 14.1) header fields in order to provide + open and extensible data typing and type negotiation. + + media-type = type "/" subtype *( ";" parameter ) + type = token + subtype = token + + Parameters MAY follow the type/subtype in the form of attribute/value + pairs (as defined in section 3.6). + + The type, subtype, and parameter attribute names are case- + insensitive. Parameter values might or might not be case-sensitive, + depending on the semantics of the parameter name. Linear white space + (LWS) MUST NOT be used between the type and subtype, nor between an + attribute and its value. The presence or absence of a parameter might + be significant to the processing of a media-type, depending on its + definition within the media type registry. + + + +Fielding, et al. Standards Track [Page 26] + +RFC 2616 HTTP/1.1 June 1999 + + + Note that some older HTTP applications do not recognize media type + parameters. When sending data to older HTTP applications, + implementations SHOULD only use media type parameters when they are + required by that type/subtype definition. + + Media-type values are registered with the Internet Assigned Number + Authority (IANA [19]). The media type registration process is + outlined in RFC 1590 [17]. Use of non-registered media types is + discouraged. + +3.7.1 Canonicalization and Text Defaults + + Internet media types are registered with a canonical form. An + entity-body transferred via HTTP messages MUST be represented in the + appropriate canonical form prior to its transmission except for + "text" types, as defined in the next paragraph. + + When in canonical form, media subtypes of the "text" type use CRLF as + the text line break. HTTP relaxes this requirement and allows the + transport of text media with plain CR or LF alone representing a line + break when it is done consistently for an entire entity-body. HTTP + applications MUST accept CRLF, bare CR, and bare LF as being + representative of a line break in text media received via HTTP. In + addition, if the text is represented in a character set that does not + use octets 13 and 10 for CR and LF respectively, as is the case for + some multi-byte character sets, HTTP allows the use of whatever octet + sequences are defined by that character set to represent the + equivalent of CR and LF for line breaks. This flexibility regarding + line breaks applies only to text media in the entity-body; a bare CR + or LF MUST NOT be substituted for CRLF within any of the HTTP control + structures (such as header fields and multipart boundaries). + + If an entity-body is encoded with a content-coding, the underlying + data MUST be in a form defined above prior to being encoded. + + The "charset" parameter is used with some media types to define the + character set (section 3.4) of the data. When no explicit charset + parameter is provided by the sender, media subtypes of the "text" + type are defined to have a default charset value of "ISO-8859-1" when + received via HTTP. Data in character sets other than "ISO-8859-1" or + its subsets MUST be labeled with an appropriate charset value. See + section 3.4.1 for compatibility problems. + +3.7.2 Multipart Types + + MIME provides for a number of "multipart" types -- encapsulations of + one or more entities within a single message-body. All multipart + types share a common syntax, as defined in section 5.1.1 of RFC 2046 + + + +Fielding, et al. Standards Track [Page 27] + +RFC 2616 HTTP/1.1 June 1999 + + + [40], and MUST include a boundary parameter as part of the media type + value. The message body is itself a protocol element and MUST + therefore use only CRLF to represent line breaks between body-parts. + Unlike in RFC 2046, the epilogue of any multipart message MUST be + empty; HTTP applications MUST NOT transmit the epilogue (even if the + original multipart contains an epilogue). These restrictions exist in + order to preserve the self-delimiting nature of a multipart message- + body, wherein the "end" of the message-body is indicated by the + ending multipart boundary. + + In general, HTTP treats a multipart message-body no differently than + any other media type: strictly as payload. The one exception is the + "multipart/byteranges" type (appendix 19.2) when it appears in a 206 + (Partial Content) response, which will be interpreted by some HTTP + caching mechanisms as described in sections 13.5.4 and 14.16. In all + other cases, an HTTP user agent SHOULD follow the same or similar + behavior as a MIME user agent would upon receipt of a multipart type. + The MIME header fields within each body-part of a multipart message- + body do not have any significance to HTTP beyond that defined by + their MIME semantics. + + In general, an HTTP user agent SHOULD follow the same or similar + behavior as a MIME user agent would upon receipt of a multipart type. + If an application receives an unrecognized multipart subtype, the + application MUST treat it as being equivalent to "multipart/mixed". + + Note: The "multipart/form-data" type has been specifically defined + for carrying form data suitable for processing via the POST + request method, as described in RFC 1867 [15]. + +3.8 Product Tokens + + Product tokens are used to allow communicating applications to + identify themselves by software name and version. Most fields using + product tokens also allow sub-products which form a significant part + of the application to be listed, separated by white space. By + convention, the products are listed in order of their significance + for identifying the application. + + product = token ["/" product-version] + product-version = token + + Examples: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + Server: Apache/0.8.4 + + + + + +Fielding, et al. Standards Track [Page 28] + +RFC 2616 HTTP/1.1 June 1999 + + + Product tokens SHOULD be short and to the point. They MUST NOT be + used for advertising or other non-essential information. Although any + token character MAY appear in a product-version, this token SHOULD + only be used for a version identifier (i.e., successive versions of + the same product SHOULD only differ in the product-version portion of + the product value). + +3.9 Quality Values + + HTTP content negotiation (section 12) uses short "floating point" + numbers to indicate the relative importance ("weight") of various + negotiable parameters. A weight is normalized to a real number in + the range 0 through 1, where 0 is the minimum and 1 the maximum + value. If a parameter has a quality value of 0, then content with + this parameter is `not acceptable' for the client. HTTP/1.1 + applications MUST NOT generate more than three digits after the + decimal point. User configuration of these values SHOULD also be + limited in this fashion. + + qvalue = ( "0" [ "." 0*3DIGIT ] ) + | ( "1" [ "." 0*3("0") ] ) + + "Quality values" is a misnomer, since these values merely represent + relative degradation in desired quality. + +3.10 Language Tags + + A language tag identifies a natural language spoken, written, or + otherwise conveyed by human beings for communication of information + to other human beings. Computer languages are explicitly excluded. + HTTP uses language tags within the Accept-Language and Content- + Language fields. + + The syntax and registry of HTTP language tags is the same as that + defined by RFC 1766 [1]. In summary, a language tag is composed of 1 + or more parts: A primary language tag and a possibly empty series of + subtags: + + language-tag = primary-tag *( "-" subtag ) + primary-tag = 1*8ALPHA + subtag = 1*8ALPHA + + White space is not allowed within the tag and all tags are case- + insensitive. The name space of language tags is administered by the + IANA. Example tags include: + + en, en-US, en-cockney, i-cherokee, x-pig-latin + + + + +Fielding, et al. Standards Track [Page 29] + +RFC 2616 HTTP/1.1 June 1999 + + + where any two-letter primary-tag is an ISO-639 language abbreviation + and any two-letter initial subtag is an ISO-3166 country code. (The + last three tags above are not registered tags; all but the last are + examples of tags which could be registered in future.) + +3.11 Entity Tags + + Entity tags are used for comparing two or more entities from the same + requested resource. HTTP/1.1 uses entity tags in the ETag (section + 14.19), If-Match (section 14.24), If-None-Match (section 14.26), and + If-Range (section 14.27) header fields. The definition of how they + are used and compared as cache validators is in section 13.3.3. An + entity tag consists of an opaque quoted string, possibly prefixed by + a weakness indicator. + + entity-tag = [ weak ] opaque-tag + weak = "W/" + opaque-tag = quoted-string + + A "strong entity tag" MAY be shared by two entities of a resource + only if they are equivalent by octet equality. + + A "weak entity tag," indicated by the "W/" prefix, MAY be shared by + two entities of a resource only if the entities are equivalent and + could be substituted for each other with no significant change in + semantics. A weak entity tag can only be used for weak comparison. + + An entity tag MUST be unique across all versions of all entities + associated with a particular resource. A given entity tag value MAY + be used for entities obtained by requests on different URIs. The use + of the same entity tag value in conjunction with entities obtained by + requests on different URIs does not imply the equivalence of those + entities. + +3.12 Range Units + + HTTP/1.1 allows a client to request that only part (a range of) the + response entity be included within the response. HTTP/1.1 uses range + units in the Range (section 14.35) and Content-Range (section 14.16) + header fields. An entity can be broken down into subranges according + to various structural units. + + range-unit = bytes-unit | other-range-unit + bytes-unit = "bytes" + other-range-unit = token + + The only range unit defined by HTTP/1.1 is "bytes". HTTP/1.1 + implementations MAY ignore ranges specified using other units. + + + +Fielding, et al. Standards Track [Page 30] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 has been designed to allow implementations of applications + that do not depend on knowledge of ranges. + +4 HTTP Message + +4.1 Message Types + + HTTP messages consist of requests from client to server and responses + from server to client. + + HTTP-message = Request | Response ; HTTP/1.1 messages + + Request (section 5) and Response (section 6) messages use the generic + message format of RFC 822 [9] for transferring entities (the payload + of the message). Both types of message consist of a start-line, zero + or more header fields (also known as "headers"), an empty line (i.e., + a line with nothing preceding the CRLF) indicating the end of the + header fields, and possibly a message-body. + + generic-message = start-line + *(message-header CRLF) + CRLF + [ message-body ] + start-line = Request-Line | Status-Line + + In the interest of robustness, servers SHOULD ignore any empty + line(s) received where a Request-Line is expected. In other words, if + the server is reading the protocol stream at the beginning of a + message and receives a CRLF first, it should ignore the CRLF. + + Certain buggy HTTP/1.0 client implementations generate extra CRLF's + after a POST request. To restate what is explicitly forbidden by the + BNF, an HTTP/1.1 client MUST NOT preface or follow a request with an + extra CRLF. + +4.2 Message Headers + + HTTP header fields, which include general-header (section 4.5), + request-header (section 5.3), response-header (section 6.2), and + entity-header (section 7.1) fields, follow the same generic format as + that given in Section 3.1 of RFC 822 [9]. Each header field consists + of a name followed by a colon (":") and the field value. Field names + are case-insensitive. The field value MAY be preceded by any amount + of LWS, though a single SP is preferred. Header fields can be + extended over multiple lines by preceding each extra line with at + least one SP or HT. Applications ought to follow "common form", where + one is known or indicated, when generating HTTP constructs, since + there might exist some implementations that fail to accept anything + + + +Fielding, et al. Standards Track [Page 31] + +RFC 2616 HTTP/1.1 June 1999 + + + beyond the common forms. + + message-header = field-name ":" [ field-value ] + field-name = token + field-value = *( field-content | LWS ) + field-content = + + The field-content does not include any leading or trailing LWS: + linear white space occurring before the first non-whitespace + character of the field-value or after the last non-whitespace + character of the field-value. Such leading or trailing LWS MAY be + removed without changing the semantics of the field value. Any LWS + that occurs between field-content MAY be replaced with a single SP + before interpreting the field value or forwarding the message + downstream. + + The order in which header fields with differing field names are + received is not significant. However, it is "good practice" to send + general-header fields first, followed by request-header or response- + header fields, and ending with the entity-header fields. + + Multiple message-header fields with the same field-name MAY be + present in a message if and only if the entire field-value for that + header field is defined as a comma-separated list [i.e., #(values)]. + It MUST be possible to combine the multiple header fields into one + "field-name: field-value" pair, without changing the semantics of the + message, by appending each subsequent field-value to the first, each + separated by a comma. The order in which header fields with the same + field-name are received is therefore significant to the + interpretation of the combined field value, and thus a proxy MUST NOT + change the order of these field values when a message is forwarded. + +4.3 Message Body + + The message-body (if any) of an HTTP message is used to carry the + entity-body associated with the request or response. The message-body + differs from the entity-body only when a transfer-coding has been + applied, as indicated by the Transfer-Encoding header field (section + 14.41). + + message-body = entity-body + | + + Transfer-Encoding MUST be used to indicate any transfer-codings + applied by an application to ensure safe and proper transfer of the + message. Transfer-Encoding is a property of the message, not of the + + + +Fielding, et al. Standards Track [Page 32] + +RFC 2616 HTTP/1.1 June 1999 + + + entity, and thus MAY be added or removed by any application along the + request/response chain. (However, section 3.6 places restrictions on + when certain transfer-codings may be used.) + + The rules for when a message-body is allowed in a message differ for + requests and responses. + + The presence of a message-body in a request is signaled by the + inclusion of a Content-Length or Transfer-Encoding header field in + the request's message-headers. A message-body MUST NOT be included in + a request if the specification of the request method (section 5.1.1) + does not allow sending an entity-body in requests. A server SHOULD + read and forward a message-body on any request; if the request method + does not include defined semantics for an entity-body, then the + message-body SHOULD be ignored when handling the request. + + For response messages, whether or not a message-body is included with + a message is dependent on both the request method and the response + status code (section 6.1.1). All responses to the HEAD request method + MUST NOT include a message-body, even though the presence of entity- + header fields might lead one to believe they do. All 1xx + (informational), 204 (no content), and 304 (not modified) responses + MUST NOT include a message-body. All other responses do include a + message-body, although it MAY be of zero length. + +4.4 Message Length + + The transfer-length of a message is the length of the message-body as + it appears in the message; that is, after any transfer-codings have + been applied. When a message-body is included with a message, the + transfer-length of that body is determined by one of the following + (in order of precedence): + + 1.Any response message which "MUST NOT" include a message-body (such + as the 1xx, 204, and 304 responses and any response to a HEAD + request) is always terminated by the first empty line after the + header fields, regardless of the entity-header fields present in + the message. + + 2.If a Transfer-Encoding header field (section 14.41) is present and + has any value other than "identity", then the transfer-length is + defined by use of the "chunked" transfer-coding (section 3.6), + unless the message is terminated by closing the connection. + + 3.If a Content-Length header field (section 14.13) is present, its + decimal value in OCTETs represents both the entity-length and the + transfer-length. The Content-Length header field MUST NOT be sent + if these two lengths are different (i.e., if a Transfer-Encoding + + + +Fielding, et al. Standards Track [Page 33] + +RFC 2616 HTTP/1.1 June 1999 + + + header field is present). If a message is received with both a + Transfer-Encoding header field and a Content-Length header field, + the latter MUST be ignored. + + 4.If the message uses the media type "multipart/byteranges", and the + ransfer-length is not otherwise specified, then this self- + elimiting media type defines the transfer-length. This media type + UST NOT be used unless the sender knows that the recipient can arse + it; the presence in a request of a Range header with ultiple byte- + range specifiers from a 1.1 client implies that the lient can parse + multipart/byteranges responses. + + A range header might be forwarded by a 1.0 proxy that does not + understand multipart/byteranges; in this case the server MUST + delimit the message using methods defined in items 1,3 or 5 of + this section. + + 5.By the server closing the connection. (Closing the connection + cannot be used to indicate the end of a request body, since that + would leave no possibility for the server to send back a response.) + + For compatibility with HTTP/1.0 applications, HTTP/1.1 requests + containing a message-body MUST include a valid Content-Length header + field unless the server is known to be HTTP/1.1 compliant. If a + request contains a message-body and a Content-Length is not given, + the server SHOULD respond with 400 (bad request) if it cannot + determine the length of the message, or with 411 (length required) if + it wishes to insist on receiving a valid Content-Length. + + All HTTP/1.1 applications that receive entities MUST accept the + "chunked" transfer-coding (section 3.6), thus allowing this mechanism + to be used for messages when the message length cannot be determined + in advance. + + Messages MUST NOT include both a Content-Length header field and a + non-identity transfer-coding. If the message does include a non- + identity transfer-coding, the Content-Length MUST be ignored. + + When a Content-Length is given in a message where a message-body is + allowed, its field value MUST exactly match the number of OCTETs in + the message-body. HTTP/1.1 user agents MUST notify the user when an + invalid length is received and detected. + +4.5 General Header Fields + + There are a few header fields which have general applicability for + both request and response messages, but which do not apply to the + entity being transferred. These header fields apply only to the + + + +Fielding, et al. Standards Track [Page 34] + +RFC 2616 HTTP/1.1 June 1999 + + + message being transmitted. + + general-header = Cache-Control ; Section 14.9 + | Connection ; Section 14.10 + | Date ; Section 14.18 + | Pragma ; Section 14.32 + | Trailer ; Section 14.40 + | Transfer-Encoding ; Section 14.41 + | Upgrade ; Section 14.42 + | Via ; Section 14.45 + | Warning ; Section 14.46 + + General-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields may be given the semantics of general + header fields if all parties in the communication recognize them to + be general-header fields. Unrecognized header fields are treated as + entity-header fields. + +5 Request + + A request message from a client to a server includes, within the + first line of that message, the method to be applied to the resource, + the identifier of the resource, and the protocol version in use. + + Request = Request-Line ; Section 5.1 + *(( general-header ; Section 4.5 + | request-header ; Section 5.3 + | entity-header ) CRLF) ; Section 7.1 + CRLF + [ message-body ] ; Section 4.3 + +5.1 Request-Line + + The Request-Line begins with a method token, followed by the + Request-URI and the protocol version, and ending with CRLF. The + elements are separated by SP characters. No CR or LF is allowed + except in the final CRLF sequence. + + Request-Line = Method SP Request-URI SP HTTP-Version CRLF + + + + + + + + + + + +Fielding, et al. Standards Track [Page 35] + +RFC 2616 HTTP/1.1 June 1999 + + +5.1.1 Method + + The Method token indicates the method to be performed on the + resource identified by the Request-URI. The method is case-sensitive. + + Method = "OPTIONS" ; Section 9.2 + | "GET" ; Section 9.3 + | "HEAD" ; Section 9.4 + | "POST" ; Section 9.5 + | "PUT" ; Section 9.6 + | "DELETE" ; Section 9.7 + | "TRACE" ; Section 9.8 + | "CONNECT" ; Section 9.9 + | extension-method + extension-method = token + + The list of methods allowed by a resource can be specified in an + Allow header field (section 14.7). The return code of the response + always notifies the client whether a method is currently allowed on a + resource, since the set of allowed methods can change dynamically. An + origin server SHOULD return the status code 405 (Method Not Allowed) + if the method is known by the origin server but not allowed for the + requested resource, and 501 (Not Implemented) if the method is + unrecognized or not implemented by the origin server. The methods GET + and HEAD MUST be supported by all general-purpose servers. All other + methods are OPTIONAL; however, if the above methods are implemented, + they MUST be implemented with the same semantics as those specified + in section 9. + +5.1.2 Request-URI + + The Request-URI is a Uniform Resource Identifier (section 3.2) and + identifies the resource upon which to apply the request. + + Request-URI = "*" | absoluteURI | abs_path | authority + + The four options for Request-URI are dependent on the nature of the + request. The asterisk "*" means that the request does not apply to a + particular resource, but to the server itself, and is only allowed + when the method used does not necessarily apply to a resource. One + example would be + + OPTIONS * HTTP/1.1 + + The absoluteURI form is REQUIRED when the request is being made to a + proxy. The proxy is requested to forward the request or service it + from a valid cache, and return the response. Note that the proxy MAY + forward the request on to another proxy or directly to the server + + + +Fielding, et al. Standards Track [Page 36] + +RFC 2616 HTTP/1.1 June 1999 + + + specified by the absoluteURI. In order to avoid request loops, a + proxy MUST be able to recognize all of its server names, including + any aliases, local variations, and the numeric IP address. An example + Request-Line would be: + + GET http://www.w3.org/pub/WWW/TheProject.html HTTP/1.1 + + To allow for transition to absoluteURIs in all requests in future + versions of HTTP, all HTTP/1.1 servers MUST accept the absoluteURI + form in requests, even though HTTP/1.1 clients will only generate + them in requests to proxies. + + The authority form is only used by the CONNECT method (section 9.9). + + The most common form of Request-URI is that used to identify a + resource on an origin server or gateway. In this case the absolute + path of the URI MUST be transmitted (see section 3.2.1, abs_path) as + the Request-URI, and the network location of the URI (authority) MUST + be transmitted in a Host header field. For example, a client wishing + to retrieve the resource above directly from the origin server would + create a TCP connection to port 80 of the host "www.w3.org" and send + the lines: + + GET /pub/WWW/TheProject.html HTTP/1.1 + Host: www.w3.org + + followed by the remainder of the Request. Note that the absolute path + cannot be empty; if none is present in the original URI, it MUST be + given as "/" (the server root). + + The Request-URI is transmitted in the format specified in section + 3.2.1. If the Request-URI is encoded using the "% HEX HEX" encoding + [42], the origin server MUST decode the Request-URI in order to + properly interpret the request. Servers SHOULD respond to invalid + Request-URIs with an appropriate status code. + + A transparent proxy MUST NOT rewrite the "abs_path" part of the + received Request-URI when forwarding it to the next inbound server, + except as noted above to replace a null abs_path with "/". + + Note: The "no rewrite" rule prevents the proxy from changing the + meaning of the request when the origin server is improperly using + a non-reserved URI character for a reserved purpose. Implementors + should be aware that some pre-HTTP/1.1 proxies have been known to + rewrite the Request-URI. + + + + + + +Fielding, et al. Standards Track [Page 37] + +RFC 2616 HTTP/1.1 June 1999 + + +5.2 The Resource Identified by a Request + + The exact resource identified by an Internet request is determined by + examining both the Request-URI and the Host header field. + + An origin server that does not allow resources to differ by the + requested host MAY ignore the Host header field value when + determining the resource identified by an HTTP/1.1 request. (But see + section 19.6.1.1 for other requirements on Host support in HTTP/1.1.) + + An origin server that does differentiate resources based on the host + requested (sometimes referred to as virtual hosts or vanity host + names) MUST use the following rules for determining the requested + resource on an HTTP/1.1 request: + + 1. If Request-URI is an absoluteURI, the host is part of the + Request-URI. Any Host header field value in the request MUST be + ignored. + + 2. If the Request-URI is not an absoluteURI, and the request includes + a Host header field, the host is determined by the Host header + field value. + + 3. If the host as determined by rule 1 or 2 is not a valid host on + the server, the response MUST be a 400 (Bad Request) error message. + + Recipients of an HTTP/1.0 request that lacks a Host header field MAY + attempt to use heuristics (e.g., examination of the URI path for + something unique to a particular host) in order to determine what + exact resource is being requested. + +5.3 Request Header Fields + + The request-header fields allow the client to pass additional + information about the request, and about the client itself, to the + server. These fields act as request modifiers, with semantics + equivalent to the parameters on a programming language method + invocation. + + request-header = Accept ; Section 14.1 + | Accept-Charset ; Section 14.2 + | Accept-Encoding ; Section 14.3 + | Accept-Language ; Section 14.4 + | Authorization ; Section 14.8 + | Expect ; Section 14.20 + | From ; Section 14.22 + | Host ; Section 14.23 + | If-Match ; Section 14.24 + + + +Fielding, et al. Standards Track [Page 38] + +RFC 2616 HTTP/1.1 June 1999 + + + | If-Modified-Since ; Section 14.25 + | If-None-Match ; Section 14.26 + | If-Range ; Section 14.27 + | If-Unmodified-Since ; Section 14.28 + | Max-Forwards ; Section 14.31 + | Proxy-Authorization ; Section 14.34 + | Range ; Section 14.35 + | Referer ; Section 14.36 + | TE ; Section 14.39 + | User-Agent ; Section 14.43 + + Request-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of request- + header fields if all parties in the communication recognize them to + be request-header fields. Unrecognized header fields are treated as + entity-header fields. + +6 Response + + After receiving and interpreting a request message, a server responds + with an HTTP response message. + + Response = Status-Line ; Section 6.1 + *(( general-header ; Section 4.5 + | response-header ; Section 6.2 + | entity-header ) CRLF) ; Section 7.1 + CRLF + [ message-body ] ; Section 7.2 + +6.1 Status-Line + + The first line of a Response message is the Status-Line, consisting + of the protocol version followed by a numeric status code and its + associated textual phrase, with each element separated by SP + characters. No CR or LF is allowed except in the final CRLF sequence. + + Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF + +6.1.1 Status Code and Reason Phrase + + The Status-Code element is a 3-digit integer result code of the + attempt to understand and satisfy the request. These codes are fully + defined in section 10. The Reason-Phrase is intended to give a short + textual description of the Status-Code. The Status-Code is intended + for use by automata and the Reason-Phrase is intended for the human + user. The client is not required to examine or display the Reason- + Phrase. + + + +Fielding, et al. Standards Track [Page 39] + +RFC 2616 HTTP/1.1 June 1999 + + + The first digit of the Status-Code defines the class of response. The + last two digits do not have any categorization role. There are 5 + values for the first digit: + + - 1xx: Informational - Request received, continuing process + + - 2xx: Success - The action was successfully received, + understood, and accepted + + - 3xx: Redirection - Further action must be taken in order to + complete the request + + - 4xx: Client Error - The request contains bad syntax or cannot + be fulfilled + + - 5xx: Server Error - The server failed to fulfill an apparently + valid request + + The individual values of the numeric status codes defined for + HTTP/1.1, and an example set of corresponding Reason-Phrase's, are + presented below. The reason phrases listed here are only + recommendations -- they MAY be replaced by local equivalents without + affecting the protocol. + + Status-Code = + "100" ; Section 10.1.1: Continue + | "101" ; Section 10.1.2: Switching Protocols + | "200" ; Section 10.2.1: OK + | "201" ; Section 10.2.2: Created + | "202" ; Section 10.2.3: Accepted + | "203" ; Section 10.2.4: Non-Authoritative Information + | "204" ; Section 10.2.5: No Content + | "205" ; Section 10.2.6: Reset Content + | "206" ; Section 10.2.7: Partial Content + | "300" ; Section 10.3.1: Multiple Choices + | "301" ; Section 10.3.2: Moved Permanently + | "302" ; Section 10.3.3: Found + | "303" ; Section 10.3.4: See Other + | "304" ; Section 10.3.5: Not Modified + | "305" ; Section 10.3.6: Use Proxy + | "307" ; Section 10.3.8: Temporary Redirect + | "400" ; Section 10.4.1: Bad Request + | "401" ; Section 10.4.2: Unauthorized + | "402" ; Section 10.4.3: Payment Required + | "403" ; Section 10.4.4: Forbidden + | "404" ; Section 10.4.5: Not Found + | "405" ; Section 10.4.6: Method Not Allowed + | "406" ; Section 10.4.7: Not Acceptable + + + +Fielding, et al. Standards Track [Page 40] + +RFC 2616 HTTP/1.1 June 1999 + + + | "407" ; Section 10.4.8: Proxy Authentication Required + | "408" ; Section 10.4.9: Request Time-out + | "409" ; Section 10.4.10: Conflict + | "410" ; Section 10.4.11: Gone + | "411" ; Section 10.4.12: Length Required + | "412" ; Section 10.4.13: Precondition Failed + | "413" ; Section 10.4.14: Request Entity Too Large + | "414" ; Section 10.4.15: Request-URI Too Large + | "415" ; Section 10.4.16: Unsupported Media Type + | "416" ; Section 10.4.17: Requested range not satisfiable + | "417" ; Section 10.4.18: Expectation Failed + | "500" ; Section 10.5.1: Internal Server Error + | "501" ; Section 10.5.2: Not Implemented + | "502" ; Section 10.5.3: Bad Gateway + | "503" ; Section 10.5.4: Service Unavailable + | "504" ; Section 10.5.5: Gateway Time-out + | "505" ; Section 10.5.6: HTTP Version not supported + | extension-code + + extension-code = 3DIGIT + Reason-Phrase = * + + HTTP status codes are extensible. HTTP applications are not required + to understand the meaning of all registered status codes, though such + understanding is obviously desirable. However, applications MUST + understand the class of any status code, as indicated by the first + digit, and treat any unrecognized response as being equivalent to the + x00 status code of that class, with the exception that an + unrecognized response MUST NOT be cached. For example, if an + unrecognized status code of 431 is received by the client, it can + safely assume that there was something wrong with its request and + treat the response as if it had received a 400 status code. In such + cases, user agents SHOULD present to the user the entity returned + with the response, since that entity is likely to include human- + readable information which will explain the unusual status. + +6.2 Response Header Fields + + The response-header fields allow the server to pass additional + information about the response which cannot be placed in the Status- + Line. These header fields give information about the server and about + further access to the resource identified by the Request-URI. + + response-header = Accept-Ranges ; Section 14.5 + | Age ; Section 14.6 + | ETag ; Section 14.19 + | Location ; Section 14.30 + | Proxy-Authenticate ; Section 14.33 + + + +Fielding, et al. Standards Track [Page 41] + +RFC 2616 HTTP/1.1 June 1999 + + + | Retry-After ; Section 14.37 + | Server ; Section 14.38 + | Vary ; Section 14.44 + | WWW-Authenticate ; Section 14.47 + + Response-header field names can be extended reliably only in + combination with a change in the protocol version. However, new or + experimental header fields MAY be given the semantics of response- + header fields if all parties in the communication recognize them to + be response-header fields. Unrecognized header fields are treated as + entity-header fields. + +7 Entity + + Request and Response messages MAY transfer an entity if not otherwise + restricted by the request method or response status code. An entity + consists of entity-header fields and an entity-body, although some + responses will only include the entity-headers. + + In this section, both sender and recipient refer to either the client + or the server, depending on who sends and who receives the entity. + +7.1 Entity Header Fields + + Entity-header fields define metainformation about the entity-body or, + if no body is present, about the resource identified by the request. + Some of this metainformation is OPTIONAL; some might be REQUIRED by + portions of this specification. + + entity-header = Allow ; Section 14.7 + | Content-Encoding ; Section 14.11 + | Content-Language ; Section 14.12 + | Content-Length ; Section 14.13 + | Content-Location ; Section 14.14 + | Content-MD5 ; Section 14.15 + | Content-Range ; Section 14.16 + | Content-Type ; Section 14.17 + | Expires ; Section 14.21 + | Last-Modified ; Section 14.29 + | extension-header + + extension-header = message-header + + The extension-header mechanism allows additional entity-header fields + to be defined without changing the protocol, but these fields cannot + be assumed to be recognizable by the recipient. Unrecognized header + fields SHOULD be ignored by the recipient and MUST be forwarded by + transparent proxies. + + + +Fielding, et al. Standards Track [Page 42] + +RFC 2616 HTTP/1.1 June 1999 + + +7.2 Entity Body + + The entity-body (if any) sent with an HTTP request or response is in + a format and encoding defined by the entity-header fields. + + entity-body = *OCTET + + An entity-body is only present in a message when a message-body is + present, as described in section 4.3. The entity-body is obtained + from the message-body by decoding any Transfer-Encoding that might + have been applied to ensure safe and proper transfer of the message. + +7.2.1 Type + + When an entity-body is included with a message, the data type of that + body is determined via the header fields Content-Type and Content- + Encoding. These define a two-layer, ordered encoding model: + + entity-body := Content-Encoding( Content-Type( data ) ) + + Content-Type specifies the media type of the underlying data. + Content-Encoding may be used to indicate any additional content + codings applied to the data, usually for the purpose of data + compression, that are a property of the requested resource. There is + no default encoding. + + Any HTTP/1.1 message containing an entity-body SHOULD include a + Content-Type header field defining the media type of that body. If + and only if the media type is not given by a Content-Type field, the + recipient MAY attempt to guess the media type via inspection of its + content and/or the name extension(s) of the URI used to identify the + resource. If the media type remains unknown, the recipient SHOULD + treat it as type "application/octet-stream". + +7.2.2 Entity Length + + The entity-length of a message is the length of the message-body + before any transfer-codings have been applied. Section 4.4 defines + how the transfer-length of a message-body is determined. + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 43] + +RFC 2616 HTTP/1.1 June 1999 + + +8 Connections + +8.1 Persistent Connections + +8.1.1 Purpose + + Prior to persistent connections, a separate TCP connection was + established to fetch each URL, increasing the load on HTTP servers + and causing congestion on the Internet. The use of inline images and + other associated data often require a client to make multiple + requests of the same server in a short amount of time. Analysis of + these performance problems and results from a prototype + implementation are available [26] [30]. Implementation experience and + measurements of actual HTTP/1.1 (RFC 2068) implementations show good + results [39]. Alternatives have also been explored, for example, + T/TCP [27]. + + Persistent HTTP connections have a number of advantages: + + - By opening and closing fewer TCP connections, CPU time is saved + in routers and hosts (clients, servers, proxies, gateways, + tunnels, or caches), and memory used for TCP protocol control + blocks can be saved in hosts. + + - HTTP requests and responses can be pipelined on a connection. + Pipelining allows a client to make multiple requests without + waiting for each response, allowing a single TCP connection to + be used much more efficiently, with much lower elapsed time. + + - Network congestion is reduced by reducing the number of packets + caused by TCP opens, and by allowing TCP sufficient time to + determine the congestion state of the network. + + - Latency on subsequent requests is reduced since there is no time + spent in TCP's connection opening handshake. + + - HTTP can evolve more gracefully, since errors can be reported + without the penalty of closing the TCP connection. Clients using + future versions of HTTP might optimistically try a new feature, + but if communicating with an older server, retry with old + semantics after an error is reported. + + HTTP implementations SHOULD implement persistent connections. + + + + + + + + +Fielding, et al. Standards Track [Page 44] + +RFC 2616 HTTP/1.1 June 1999 + + +8.1.2 Overall Operation + + A significant difference between HTTP/1.1 and earlier versions of + HTTP is that persistent connections are the default behavior of any + HTTP connection. That is, unless otherwise indicated, the client + SHOULD assume that the server will maintain a persistent connection, + even after error responses from the server. + + Persistent connections provide a mechanism by which a client and a + server can signal the close of a TCP connection. This signaling takes + place using the Connection header field (section 14.10). Once a close + has been signaled, the client MUST NOT send any more requests on that + connection. + +8.1.2.1 Negotiation + + An HTTP/1.1 server MAY assume that a HTTP/1.1 client intends to + maintain a persistent connection unless a Connection header including + the connection-token "close" was sent in the request. If the server + chooses to close the connection immediately after sending the + response, it SHOULD send a Connection header including the + connection-token close. + + An HTTP/1.1 client MAY expect a connection to remain open, but would + decide to keep it open based on whether the response from a server + contains a Connection header with the connection-token close. In case + the client does not want to maintain a connection for more than that + request, it SHOULD send a Connection header including the + connection-token close. + + If either the client or the server sends the close token in the + Connection header, that request becomes the last one for the + connection. + + Clients and servers SHOULD NOT assume that a persistent connection is + maintained for HTTP versions less than 1.1 unless it is explicitly + signaled. See section 19.6.2 for more information on backward + compatibility with HTTP/1.0 clients. + + In order to remain persistent, all messages on the connection MUST + have a self-defined message length (i.e., one not defined by closure + of the connection), as described in section 4.4. + + + + + + + + + +Fielding, et al. Standards Track [Page 45] + +RFC 2616 HTTP/1.1 June 1999 + + +8.1.2.2 Pipelining + + A client that supports persistent connections MAY "pipeline" its + requests (i.e., send multiple requests without waiting for each + response). A server MUST send its responses to those requests in the + same order that the requests were received. + + Clients which assume persistent connections and pipeline immediately + after connection establishment SHOULD be prepared to retry their + connection if the first pipelined attempt fails. If a client does + such a retry, it MUST NOT pipeline before it knows the connection is + persistent. Clients MUST also be prepared to resend their requests if + the server closes the connection before sending all of the + corresponding responses. + + Clients SHOULD NOT pipeline requests using non-idempotent methods or + non-idempotent sequences of methods (see section 9.1.2). Otherwise, a + premature termination of the transport connection could lead to + indeterminate results. A client wishing to send a non-idempotent + request SHOULD wait to send that request until it has received the + response status for the previous request. + +8.1.3 Proxy Servers + + It is especially important that proxies correctly implement the + properties of the Connection header field as specified in section + 14.10. + + The proxy server MUST signal persistent connections separately with + its clients and the origin servers (or other proxy servers) that it + connects to. Each persistent connection applies to only one transport + link. + + A proxy server MUST NOT establish a HTTP/1.1 persistent connection + with an HTTP/1.0 client (but see RFC 2068 [33] for information and + discussion of the problems with the Keep-Alive header implemented by + many HTTP/1.0 clients). + +8.1.4 Practical Considerations + + Servers will usually have some time-out value beyond which they will + no longer maintain an inactive connection. Proxy servers might make + this a higher value since it is likely that the client will be making + more connections through the same server. The use of persistent + connections places no requirements on the length (or existence) of + this time-out for either the client or the server. + + + + + +Fielding, et al. Standards Track [Page 46] + +RFC 2616 HTTP/1.1 June 1999 + + + When a client or server wishes to time-out it SHOULD issue a graceful + close on the transport connection. Clients and servers SHOULD both + constantly watch for the other side of the transport close, and + respond to it as appropriate. If a client or server does not detect + the other side's close promptly it could cause unnecessary resource + drain on the network. + + A client, server, or proxy MAY close the transport connection at any + time. For example, a client might have started to send a new request + at the same time that the server has decided to close the "idle" + connection. From the server's point of view, the connection is being + closed while it was idle, but from the client's point of view, a + request is in progress. + + This means that clients, servers, and proxies MUST be able to recover + from asynchronous close events. Client software SHOULD reopen the + transport connection and retransmit the aborted sequence of requests + without user interaction so long as the request sequence is + idempotent (see section 9.1.2). Non-idempotent methods or sequences + MUST NOT be automatically retried, although user agents MAY offer a + human operator the choice of retrying the request(s). Confirmation by + user-agent software with semantic understanding of the application + MAY substitute for user confirmation. The automatic retry SHOULD NOT + be repeated if the second sequence of requests fails. + + Servers SHOULD always respond to at least one request per connection, + if at all possible. Servers SHOULD NOT close a connection in the + middle of transmitting a response, unless a network or client failure + is suspected. + + Clients that use persistent connections SHOULD limit the number of + simultaneous connections that they maintain to a given server. A + single-user client SHOULD NOT maintain more than 2 connections with + any server or proxy. A proxy SHOULD use up to 2*N connections to + another server or proxy, where N is the number of simultaneously + active users. These guidelines are intended to improve HTTP response + times and avoid congestion. + +8.2 Message Transmission Requirements + +8.2.1 Persistent Connections and Flow Control + + HTTP/1.1 servers SHOULD maintain persistent connections and use TCP's + flow control mechanisms to resolve temporary overloads, rather than + terminating connections with the expectation that clients will retry. + The latter technique can exacerbate network congestion. + + + + + +Fielding, et al. Standards Track [Page 47] + +RFC 2616 HTTP/1.1 June 1999 + + +8.2.2 Monitoring Connections for Error Status Messages + + An HTTP/1.1 (or later) client sending a message-body SHOULD monitor + the network connection for an error status while it is transmitting + the request. If the client sees an error status, it SHOULD + immediately cease transmitting the body. If the body is being sent + using a "chunked" encoding (section 3.6), a zero length chunk and + empty trailer MAY be used to prematurely mark the end of the message. + If the body was preceded by a Content-Length header, the client MUST + close the connection. + +8.2.3 Use of the 100 (Continue) Status + + The purpose of the 100 (Continue) status (see section 10.1.1) is to + allow a client that is sending a request message with a request body + to determine if the origin server is willing to accept the request + (based on the request headers) before the client sends the request + body. In some cases, it might either be inappropriate or highly + inefficient for the client to send the body if the server will reject + the message without looking at the body. + + Requirements for HTTP/1.1 clients: + + - If a client will wait for a 100 (Continue) response before + sending the request body, it MUST send an Expect request-header + field (section 14.20) with the "100-continue" expectation. + + - A client MUST NOT send an Expect request-header field (section + 14.20) with the "100-continue" expectation if it does not intend + to send a request body. + + Because of the presence of older implementations, the protocol allows + ambiguous situations in which a client may send "Expect: 100- + continue" without receiving either a 417 (Expectation Failed) status + or a 100 (Continue) status. Therefore, when a client sends this + header field to an origin server (possibly via a proxy) from which it + has never seen a 100 (Continue) status, the client SHOULD NOT wait + for an indefinite period before sending the request body. + + Requirements for HTTP/1.1 origin servers: + + - Upon receiving a request which includes an Expect request-header + field with the "100-continue" expectation, an origin server MUST + either respond with 100 (Continue) status and continue to read + from the input stream, or respond with a final status code. The + origin server MUST NOT wait for the request body before sending + the 100 (Continue) response. If it responds with a final status + code, it MAY close the transport connection or it MAY continue + + + +Fielding, et al. Standards Track [Page 48] + +RFC 2616 HTTP/1.1 June 1999 + + + to read and discard the rest of the request. It MUST NOT + perform the requested method if it returns a final status code. + + - An origin server SHOULD NOT send a 100 (Continue) response if + the request message does not include an Expect request-header + field with the "100-continue" expectation, and MUST NOT send a + 100 (Continue) response if such a request comes from an HTTP/1.0 + (or earlier) client. There is an exception to this rule: for + compatibility with RFC 2068, a server MAY send a 100 (Continue) + status in response to an HTTP/1.1 PUT or POST request that does + not include an Expect request-header field with the "100- + continue" expectation. This exception, the purpose of which is + to minimize any client processing delays associated with an + undeclared wait for 100 (Continue) status, applies only to + HTTP/1.1 requests, and not to requests with any other HTTP- + version value. + + - An origin server MAY omit a 100 (Continue) response if it has + already received some or all of the request body for the + corresponding request. + + - An origin server that sends a 100 (Continue) response MUST + ultimately send a final status code, once the request body is + received and processed, unless it terminates the transport + connection prematurely. + + - If an origin server receives a request that does not include an + Expect request-header field with the "100-continue" expectation, + the request includes a request body, and the server responds + with a final status code before reading the entire request body + from the transport connection, then the server SHOULD NOT close + the transport connection until it has read the entire request, + or until the client closes the connection. Otherwise, the client + might not reliably receive the response message. However, this + requirement is not be construed as preventing a server from + defending itself against denial-of-service attacks, or from + badly broken client implementations. + + Requirements for HTTP/1.1 proxies: + + - If a proxy receives a request that includes an Expect request- + header field with the "100-continue" expectation, and the proxy + either knows that the next-hop server complies with HTTP/1.1 or + higher, or does not know the HTTP version of the next-hop + server, it MUST forward the request, including the Expect header + field. + + + + + +Fielding, et al. Standards Track [Page 49] + +RFC 2616 HTTP/1.1 June 1999 + + + - If the proxy knows that the version of the next-hop server is + HTTP/1.0 or lower, it MUST NOT forward the request, and it MUST + respond with a 417 (Expectation Failed) status. + + - Proxies SHOULD maintain a cache recording the HTTP version + numbers received from recently-referenced next-hop servers. + + - A proxy MUST NOT forward a 100 (Continue) response if the + request message was received from an HTTP/1.0 (or earlier) + client and did not include an Expect request-header field with + the "100-continue" expectation. This requirement overrides the + general rule for forwarding of 1xx responses (see section 10.1). + +8.2.4 Client Behavior if Server Prematurely Closes Connection + + If an HTTP/1.1 client sends a request which includes a request body, + but which does not include an Expect request-header field with the + "100-continue" expectation, and if the client is not directly + connected to an HTTP/1.1 origin server, and if the client sees the + connection close before receiving any status from the server, the + client SHOULD retry the request. If the client does retry this + request, it MAY use the following "binary exponential backoff" + algorithm to be assured of obtaining a reliable response: + + 1. Initiate a new connection to the server + + 2. Transmit the request-headers + + 3. Initialize a variable R to the estimated round-trip time to the + server (e.g., based on the time it took to establish the + connection), or to a constant value of 5 seconds if the round- + trip time is not available. + + 4. Compute T = R * (2**N), where N is the number of previous + retries of this request. + + 5. Wait either for an error response from the server, or for T + seconds (whichever comes first) + + 6. If no error response is received, after T seconds transmit the + body of the request. + + 7. If client sees that the connection is closed prematurely, + repeat from step 1 until the request is accepted, an error + response is received, or the user becomes impatient and + terminates the retry process. + + + + + +Fielding, et al. Standards Track [Page 50] + +RFC 2616 HTTP/1.1 June 1999 + + + If at any point an error status is received, the client + + - SHOULD NOT continue and + + - SHOULD close the connection if it has not completed sending the + request message. + +9 Method Definitions + + The set of common methods for HTTP/1.1 is defined below. Although + this set can be expanded, additional methods cannot be assumed to + share the same semantics for separately extended clients and servers. + + The Host request-header field (section 14.23) MUST accompany all + HTTP/1.1 requests. + +9.1 Safe and Idempotent Methods + +9.1.1 Safe Methods + + Implementors should be aware that the software represents the user in + their interactions over the Internet, and should be careful to allow + the user to be aware of any actions they might take which may have an + unexpected significance to themselves or others. + + In particular, the convention has been established that the GET and + HEAD methods SHOULD NOT have the significance of taking an action + other than retrieval. These methods ought to be considered "safe". + This allows user agents to represent other methods, such as POST, PUT + and DELETE, in a special way, so that the user is made aware of the + fact that a possibly unsafe action is being requested. + + Naturally, it is not possible to ensure that the server does not + generate side-effects as a result of performing a GET request; in + fact, some dynamic resources consider that a feature. The important + distinction here is that the user did not request the side-effects, + so therefore cannot be held accountable for them. + +9.1.2 Idempotent Methods + + Methods can also have the property of "idempotence" in that (aside + from error or expiration issues) the side-effects of N > 0 identical + requests is the same as for a single request. The methods GET, HEAD, + PUT and DELETE share this property. Also, the methods OPTIONS and + TRACE SHOULD NOT have side effects, and so are inherently idempotent. + + + + + + +Fielding, et al. Standards Track [Page 51] + +RFC 2616 HTTP/1.1 June 1999 + + + However, it is possible that a sequence of several requests is non- + idempotent, even if all of the methods executed in that sequence are + idempotent. (A sequence is idempotent if a single execution of the + entire sequence always yields a result that is not changed by a + reexecution of all, or part, of that sequence.) For example, a + sequence is non-idempotent if its result depends on a value that is + later modified in the same sequence. + + A sequence that never has side effects is idempotent, by definition + (provided that no concurrent operations are being executed on the + same set of resources). + +9.2 OPTIONS + + The OPTIONS method represents a request for information about the + communication options available on the request/response chain + identified by the Request-URI. This method allows the client to + determine the options and/or requirements associated with a resource, + or the capabilities of a server, without implying a resource action + or initiating a resource retrieval. + + Responses to this method are not cacheable. + + If the OPTIONS request includes an entity-body (as indicated by the + presence of Content-Length or Transfer-Encoding), then the media type + MUST be indicated by a Content-Type field. Although this + specification does not define any use for such a body, future + extensions to HTTP might use the OPTIONS body to make more detailed + queries on the server. A server that does not support such an + extension MAY discard the request body. + + If the Request-URI is an asterisk ("*"), the OPTIONS request is + intended to apply to the server in general rather than to a specific + resource. Since a server's communication options typically depend on + the resource, the "*" request is only useful as a "ping" or "no-op" + type of method; it does nothing beyond allowing the client to test + the capabilities of the server. For example, this can be used to test + a proxy for HTTP/1.1 compliance (or lack thereof). + + If the Request-URI is not an asterisk, the OPTIONS request applies + only to the options that are available when communicating with that + resource. + + A 200 response SHOULD include any header fields that indicate + optional features implemented by the server and applicable to that + resource (e.g., Allow), possibly including extensions not defined by + this specification. The response body, if any, SHOULD also include + information about the communication options. The format for such a + + + +Fielding, et al. Standards Track [Page 52] + +RFC 2616 HTTP/1.1 June 1999 + + + body is not defined by this specification, but might be defined by + future extensions to HTTP. Content negotiation MAY be used to select + the appropriate response format. If no response body is included, the + response MUST include a Content-Length field with a field-value of + "0". + + The Max-Forwards request-header field MAY be used to target a + specific proxy in the request chain. When a proxy receives an OPTIONS + request on an absoluteURI for which request forwarding is permitted, + the proxy MUST check for a Max-Forwards field. If the Max-Forwards + field-value is zero ("0"), the proxy MUST NOT forward the message; + instead, the proxy SHOULD respond with its own communication options. + If the Max-Forwards field-value is an integer greater than zero, the + proxy MUST decrement the field-value when it forwards the request. If + no Max-Forwards field is present in the request, then the forwarded + request MUST NOT include a Max-Forwards field. + +9.3 GET + + The GET method means retrieve whatever information (in the form of an + entity) is identified by the Request-URI. If the Request-URI refers + to a data-producing process, it is the produced data which shall be + returned as the entity in the response and not the source text of the + process, unless that text happens to be the output of the process. + + The semantics of the GET method change to a "conditional GET" if the + request message includes an If-Modified-Since, If-Unmodified-Since, + If-Match, If-None-Match, or If-Range header field. A conditional GET + method requests that the entity be transferred only under the + circumstances described by the conditional header field(s). The + conditional GET method is intended to reduce unnecessary network + usage by allowing cached entities to be refreshed without requiring + multiple requests or transferring data already held by the client. + + The semantics of the GET method change to a "partial GET" if the + request message includes a Range header field. A partial GET requests + that only part of the entity be transferred, as described in section + 14.35. The partial GET method is intended to reduce unnecessary + network usage by allowing partially-retrieved entities to be + completed without transferring data already held by the client. + + The response to a GET request is cacheable if and only if it meets + the requirements for HTTP caching described in section 13. + + See section 15.1.3 for security considerations when used for forms. + + + + + + +Fielding, et al. Standards Track [Page 53] + +RFC 2616 HTTP/1.1 June 1999 + + +9.4 HEAD + + The HEAD method is identical to GET except that the server MUST NOT + return a message-body in the response. The metainformation contained + in the HTTP headers in response to a HEAD request SHOULD be identical + to the information sent in response to a GET request. This method can + be used for obtaining metainformation about the entity implied by the + request without transferring the entity-body itself. This method is + often used for testing hypertext links for validity, accessibility, + and recent modification. + + The response to a HEAD request MAY be cacheable in the sense that the + information contained in the response MAY be used to update a + previously cached entity from that resource. If the new field values + indicate that the cached entity differs from the current entity (as + would be indicated by a change in Content-Length, Content-MD5, ETag + or Last-Modified), then the cache MUST treat the cache entry as + stale. + +9.5 POST + + The POST method is used to request that the origin server accept the + entity enclosed in the request as a new subordinate of the resource + identified by the Request-URI in the Request-Line. POST is designed + to allow a uniform method to cover the following functions: + + - Annotation of existing resources; + + - Posting a message to a bulletin board, newsgroup, mailing list, + or similar group of articles; + + - Providing a block of data, such as the result of submitting a + form, to a data-handling process; + + - Extending a database through an append operation. + + The actual function performed by the POST method is determined by the + server and is usually dependent on the Request-URI. The posted entity + is subordinate to that URI in the same way that a file is subordinate + to a directory containing it, a news article is subordinate to a + newsgroup to which it is posted, or a record is subordinate to a + database. + + The action performed by the POST method might not result in a + resource that can be identified by a URI. In this case, either 200 + (OK) or 204 (No Content) is the appropriate response status, + depending on whether or not the response includes an entity that + describes the result. + + + +Fielding, et al. Standards Track [Page 54] + +RFC 2616 HTTP/1.1 June 1999 + + + If a resource has been created on the origin server, the response + SHOULD be 201 (Created) and contain an entity which describes the + status of the request and refers to the new resource, and a Location + header (see section 14.30). + + Responses to this method are not cacheable, unless the response + includes appropriate Cache-Control or Expires header fields. However, + the 303 (See Other) response can be used to direct the user agent to + retrieve a cacheable resource. + + POST requests MUST obey the message transmission requirements set out + in section 8.2. + + See section 15.1.3 for security considerations. + +9.6 PUT + + The PUT method requests that the enclosed entity be stored under the + supplied Request-URI. If the Request-URI refers to an already + existing resource, the enclosed entity SHOULD be considered as a + modified version of the one residing on the origin server. If the + Request-URI does not point to an existing resource, and that URI is + capable of being defined as a new resource by the requesting user + agent, the origin server can create the resource with that URI. If a + new resource is created, the origin server MUST inform the user agent + via the 201 (Created) response. If an existing resource is modified, + either the 200 (OK) or 204 (No Content) response codes SHOULD be sent + to indicate successful completion of the request. If the resource + could not be created or modified with the Request-URI, an appropriate + error response SHOULD be given that reflects the nature of the + problem. The recipient of the entity MUST NOT ignore any Content-* + (e.g. Content-Range) headers that it does not understand or implement + and MUST return a 501 (Not Implemented) response in such cases. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries SHOULD be + treated as stale. Responses to this method are not cacheable. + + The fundamental difference between the POST and PUT requests is + reflected in the different meaning of the Request-URI. The URI in a + POST request identifies the resource that will handle the enclosed + entity. That resource might be a data-accepting process, a gateway to + some other protocol, or a separate entity that accepts annotations. + In contrast, the URI in a PUT request identifies the entity enclosed + with the request -- the user agent knows what URI is intended and the + server MUST NOT attempt to apply the request to some other resource. + If the server desires that the request be applied to a different URI, + + + + +Fielding, et al. Standards Track [Page 55] + +RFC 2616 HTTP/1.1 June 1999 + + + it MUST send a 301 (Moved Permanently) response; the user agent MAY + then make its own decision regarding whether or not to redirect the + request. + + A single resource MAY be identified by many different URIs. For + example, an article might have a URI for identifying "the current + version" which is separate from the URI identifying each particular + version. In this case, a PUT request on a general URI might result in + several other URIs being defined by the origin server. + + HTTP/1.1 does not define how a PUT method affects the state of an + origin server. + + PUT requests MUST obey the message transmission requirements set out + in section 8.2. + + Unless otherwise specified for a particular entity-header, the + entity-headers in the PUT request SHOULD be applied to the resource + created or modified by the PUT. + +9.7 DELETE + + The DELETE method requests that the origin server delete the resource + identified by the Request-URI. This method MAY be overridden by human + intervention (or other means) on the origin server. The client cannot + be guaranteed that the operation has been carried out, even if the + status code returned from the origin server indicates that the action + has been completed successfully. However, the server SHOULD NOT + indicate success unless, at the time the response is given, it + intends to delete the resource or move it to an inaccessible + location. + + A successful response SHOULD be 200 (OK) if the response includes an + entity describing the status, 202 (Accepted) if the action has not + yet been enacted, or 204 (No Content) if the action has been enacted + but the response does not include an entity. + + If the request passes through a cache and the Request-URI identifies + one or more currently cached entities, those entries SHOULD be + treated as stale. Responses to this method are not cacheable. + +9.8 TRACE + + The TRACE method is used to invoke a remote, application-layer loop- + back of the request message. The final recipient of the request + SHOULD reflect the message received back to the client as the + entity-body of a 200 (OK) response. The final recipient is either the + + + + +Fielding, et al. Standards Track [Page 56] + +RFC 2616 HTTP/1.1 June 1999 + + + origin server or the first proxy or gateway to receive a Max-Forwards + value of zero (0) in the request (see section 14.31). A TRACE request + MUST NOT include an entity. + + TRACE allows the client to see what is being received at the other + end of the request chain and use that data for testing or diagnostic + information. The value of the Via header field (section 14.45) is of + particular interest, since it acts as a trace of the request chain. + Use of the Max-Forwards header field allows the client to limit the + length of the request chain, which is useful for testing a chain of + proxies forwarding messages in an infinite loop. + + If the request is valid, the response SHOULD contain the entire + request message in the entity-body, with a Content-Type of + "message/http". Responses to this method MUST NOT be cached. + +9.9 CONNECT + + This specification reserves the method name CONNECT for use with a + proxy that can dynamically switch to being a tunnel (e.g. SSL + tunneling [44]). + +10 Status Code Definitions + + Each Status-Code is described below, including a description of which + method(s) it can follow and any metainformation required in the + response. + +10.1 Informational 1xx + + This class of status code indicates a provisional response, + consisting only of the Status-Line and optional headers, and is + terminated by an empty line. There are no required headers for this + class of status code. Since HTTP/1.0 did not define any 1xx status + codes, servers MUST NOT send a 1xx response to an HTTP/1.0 client + except under experimental conditions. + + A client MUST be prepared to accept one or more 1xx status responses + prior to a regular response, even if the client does not expect a 100 + (Continue) status message. Unexpected 1xx status responses MAY be + ignored by a user agent. + + Proxies MUST forward 1xx responses, unless the connection between the + proxy and its client has been closed, or unless the proxy itself + requested the generation of the 1xx response. (For example, if a + + + + + + +Fielding, et al. Standards Track [Page 57] + +RFC 2616 HTTP/1.1 June 1999 + + + proxy adds a "Expect: 100-continue" field when it forwards a request, + then it need not forward the corresponding 100 (Continue) + response(s).) + +10.1.1 100 Continue + + The client SHOULD continue with its request. This interim response is + used to inform the client that the initial part of the request has + been received and has not yet been rejected by the server. The client + SHOULD continue by sending the remainder of the request or, if the + request has already been completed, ignore this response. The server + MUST send a final response after the request has been completed. See + section 8.2.3 for detailed discussion of the use and handling of this + status code. + +10.1.2 101 Switching Protocols + + The server understands and is willing to comply with the client's + request, via the Upgrade message header field (section 14.42), for a + change in the application protocol being used on this connection. The + server will switch protocols to those defined by the response's + Upgrade header field immediately after the empty line which + terminates the 101 response. + + The protocol SHOULD be switched only when it is advantageous to do + so. For example, switching to a newer version of HTTP is advantageous + over older versions, and switching to a real-time, synchronous + protocol might be advantageous when delivering resources that use + such features. + +10.2 Successful 2xx + + This class of status code indicates that the client's request was + successfully received, understood, and accepted. + +10.2.1 200 OK + + The request has succeeded. The information returned with the response + is dependent on the method used in the request, for example: + + GET an entity corresponding to the requested resource is sent in + the response; + + HEAD the entity-header fields corresponding to the requested + resource are sent in the response without any message-body; + + POST an entity describing or containing the result of the action; + + + + +Fielding, et al. Standards Track [Page 58] + +RFC 2616 HTTP/1.1 June 1999 + + + TRACE an entity containing the request message as received by the + end server. + +10.2.2 201 Created + + The request has been fulfilled and resulted in a new resource being + created. The newly created resource can be referenced by the URI(s) + returned in the entity of the response, with the most specific URI + for the resource given by a Location header field. The response + SHOULD include an entity containing a list of resource + characteristics and location(s) from which the user or user agent can + choose the one most appropriate. The entity format is specified by + the media type given in the Content-Type header field. The origin + server MUST create the resource before returning the 201 status code. + If the action cannot be carried out immediately, the server SHOULD + respond with 202 (Accepted) response instead. + + A 201 response MAY contain an ETag response header field indicating + the current value of the entity tag for the requested variant just + created, see section 14.19. + +10.2.3 202 Accepted + + The request has been accepted for processing, but the processing has + not been completed. The request might or might not eventually be + acted upon, as it might be disallowed when processing actually takes + place. There is no facility for re-sending a status code from an + asynchronous operation such as this. + + The 202 response is intentionally non-committal. Its purpose is to + allow a server to accept a request for some other process (perhaps a + batch-oriented process that is only run once per day) without + requiring that the user agent's connection to the server persist + until the process is completed. The entity returned with this + response SHOULD include an indication of the request's current status + and either a pointer to a status monitor or some estimate of when the + user can expect the request to be fulfilled. + +10.2.4 203 Non-Authoritative Information + + The returned metainformation in the entity-header is not the + definitive set as available from the origin server, but is gathered + from a local or a third-party copy. The set presented MAY be a subset + or superset of the original version. For example, including local + annotation information about the resource might result in a superset + of the metainformation known by the origin server. Use of this + response code is not required and is only appropriate when the + response would otherwise be 200 (OK). + + + +Fielding, et al. Standards Track [Page 59] + +RFC 2616 HTTP/1.1 June 1999 + + +10.2.5 204 No Content + + The server has fulfilled the request but does not need to return an + entity-body, and might want to return updated metainformation. The + response MAY include new or updated metainformation in the form of + entity-headers, which if present SHOULD be associated with the + requested variant. + + If the client is a user agent, it SHOULD NOT change its document view + from that which caused the request to be sent. This response is + primarily intended to allow input for actions to take place without + causing a change to the user agent's active document view, although + any new or updated metainformation SHOULD be applied to the document + currently in the user agent's active view. + + The 204 response MUST NOT include a message-body, and thus is always + terminated by the first empty line after the header fields. + +10.2.6 205 Reset Content + + The server has fulfilled the request and the user agent SHOULD reset + the document view which caused the request to be sent. This response + is primarily intended to allow input for actions to take place via + user input, followed by a clearing of the form in which the input is + given so that the user can easily initiate another input action. The + response MUST NOT include an entity. + +10.2.7 206 Partial Content + + The server has fulfilled the partial GET request for the resource. + The request MUST have included a Range header field (section 14.35) + indicating the desired range, and MAY have included an If-Range + header field (section 14.27) to make the request conditional. + + The response MUST include the following header fields: + + - Either a Content-Range header field (section 14.16) indicating + the range included with this response, or a multipart/byteranges + Content-Type including Content-Range fields for each part. If a + Content-Length header field is present in the response, its + value MUST match the actual number of OCTETs transmitted in the + message-body. + + - Date + + - ETag and/or Content-Location, if the header would have been sent + in a 200 response to the same request + + + + +Fielding, et al. Standards Track [Page 60] + +RFC 2616 HTTP/1.1 June 1999 + + + - Expires, Cache-Control, and/or Vary, if the field-value might + differ from that sent in any previous response for the same + variant + + If the 206 response is the result of an If-Range request that used a + strong cache validator (see section 13.3.3), the response SHOULD NOT + include other entity-headers. If the response is the result of an + If-Range request that used a weak validator, the response MUST NOT + include other entity-headers; this prevents inconsistencies between + cached entity-bodies and updated headers. Otherwise, the response + MUST include all of the entity-headers that would have been returned + with a 200 (OK) response to the same request. + + A cache MUST NOT combine a 206 response with other previously cached + content if the ETag or Last-Modified headers do not match exactly, + see 13.5.4. + + A cache that does not support the Range and Content-Range headers + MUST NOT cache 206 (Partial) responses. + +10.3 Redirection 3xx + + This class of status code indicates that further action needs to be + taken by the user agent in order to fulfill the request. The action + required MAY be carried out by the user agent without interaction + with the user if and only if the method used in the second request is + GET or HEAD. A client SHOULD detect infinite redirection loops, since + such loops generate network traffic for each redirection. + + Note: previous versions of this specification recommended a + maximum of five redirections. Content developers should be aware + that there might be clients that implement such a fixed + limitation. + +10.3.1 300 Multiple Choices + + The requested resource corresponds to any one of a set of + representations, each with its own specific location, and agent- + driven negotiation information (section 12) is being provided so that + the user (or user agent) can select a preferred representation and + redirect its request to that location. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of resource characteristics and location(s) from + which the user or user agent can choose the one most appropriate. The + entity format is specified by the media type given in the Content- + Type header field. Depending upon the format and the capabilities of + + + + +Fielding, et al. Standards Track [Page 61] + +RFC 2616 HTTP/1.1 June 1999 + + + the user agent, selection of the most appropriate choice MAY be + performed automatically. However, this specification does not define + any standard for such automatic selection. + + If the server has a preferred choice of representation, it SHOULD + include the specific URI for that representation in the Location + field; user agents MAY use the Location field value for automatic + redirection. This response is cacheable unless indicated otherwise. + +10.3.2 301 Moved Permanently + + The requested resource has been assigned a new permanent URI and any + future references to this resource SHOULD use one of the returned + URIs. Clients with link editing capabilities ought to automatically + re-link references to the Request-URI to one or more of the new + references returned by the server, where possible. This response is + cacheable unless indicated otherwise. + + The new permanent URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + If the 301 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + Note: When automatically redirecting a POST request after + receiving a 301 status code, some existing HTTP/1.0 user agents + will erroneously change it into a GET request. + +10.3.3 302 Found + + The requested resource resides temporarily under a different URI. + Since the redirection might be altered on occasion, the client SHOULD + continue to use the Request-URI for future requests. This response + is only cacheable if indicated by a Cache-Control or Expires header + field. + + The temporary URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + + + + + + +Fielding, et al. Standards Track [Page 62] + +RFC 2616 HTTP/1.1 June 1999 + + + If the 302 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + + Note: RFC 1945 and RFC 2068 specify that the client is not allowed + to change the method on the redirected request. However, most + existing user agent implementations treat 302 as if it were a 303 + response, performing a GET on the Location field-value regardless + of the original request method. The status codes 303 and 307 have + been added for servers that wish to make unambiguously clear which + kind of reaction is expected of the client. + +10.3.4 303 See Other + + The response to the request can be found under a different URI and + SHOULD be retrieved using a GET method on that resource. This method + exists primarily to allow the output of a POST-activated script to + redirect the user agent to a selected resource. The new URI is not a + substitute reference for the originally requested resource. The 303 + response MUST NOT be cached, but the response to the second + (redirected) request might be cacheable. + + The different URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s). + + Note: Many pre-HTTP/1.1 user agents do not understand the 303 + status. When interoperability with such clients is a concern, the + 302 status code may be used instead, since most user agents react + to a 302 response as described here for 303. + +10.3.5 304 Not Modified + + If the client has performed a conditional GET request and access is + allowed, but the document has not been modified, the server SHOULD + respond with this status code. The 304 response MUST NOT contain a + message-body, and thus is always terminated by the first empty line + after the header fields. + + The response MUST include the following header fields: + + - Date, unless its omission is required by section 14.18.1 + + + + + + + +Fielding, et al. Standards Track [Page 63] + +RFC 2616 HTTP/1.1 June 1999 + + + If a clockless origin server obeys these rules, and proxies and + clients add their own Date to any response received without one (as + already specified by [RFC 2068], section 14.19), caches will operate + correctly. + + - ETag and/or Content-Location, if the header would have been sent + in a 200 response to the same request + + - Expires, Cache-Control, and/or Vary, if the field-value might + differ from that sent in any previous response for the same + variant + + If the conditional GET used a strong cache validator (see section + 13.3.3), the response SHOULD NOT include other entity-headers. + Otherwise (i.e., the conditional GET used a weak validator), the + response MUST NOT include other entity-headers; this prevents + inconsistencies between cached entity-bodies and updated headers. + + If a 304 response indicates an entity not currently cached, then the + cache MUST disregard the response and repeat the request without the + conditional. + + If a cache uses a received 304 response to update a cache entry, the + cache MUST update the entry to reflect any new field values given in + the response. + +10.3.6 305 Use Proxy + + The requested resource MUST be accessed through the proxy given by + the Location field. The Location field gives the URI of the proxy. + The recipient is expected to repeat this single request via the + proxy. 305 responses MUST only be generated by origin servers. + + Note: RFC 2068 was not clear that 305 was intended to redirect a + single request, and to be generated by origin servers only. Not + observing these limitations has significant security consequences. + +10.3.7 306 (Unused) + + The 306 status code was used in a previous version of the + specification, is no longer used, and the code is reserved. + + + + + + + + + + +Fielding, et al. Standards Track [Page 64] + +RFC 2616 HTTP/1.1 June 1999 + + +10.3.8 307 Temporary Redirect + + The requested resource resides temporarily under a different URI. + Since the redirection MAY be altered on occasion, the client SHOULD + continue to use the Request-URI for future requests. This response + is only cacheable if indicated by a Cache-Control or Expires header + field. + + The temporary URI SHOULD be given by the Location field in the + response. Unless the request method was HEAD, the entity of the + response SHOULD contain a short hypertext note with a hyperlink to + the new URI(s) , since many pre-HTTP/1.1 user agents do not + understand the 307 status. Therefore, the note SHOULD contain the + information necessary for a user to repeat the original request on + the new URI. + + If the 307 status code is received in response to a request other + than GET or HEAD, the user agent MUST NOT automatically redirect the + request unless it can be confirmed by the user, since this might + change the conditions under which the request was issued. + +10.4 Client Error 4xx + + The 4xx class of status code is intended for cases in which the + client seems to have erred. Except when responding to a HEAD request, + the server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. These status codes are applicable to any request method. + User agents SHOULD display any included entity to the user. + + If the client is sending data, a server implementation using TCP + SHOULD be careful to ensure that the client acknowledges receipt of + the packet(s) containing the response, before the server closes the + input connection. If the client continues sending data to the server + after the close, the server's TCP stack will send a reset packet to + the client, which may erase the client's unacknowledged input buffers + before they can be read and interpreted by the HTTP application. + +10.4.1 400 Bad Request + + The request could not be understood by the server due to malformed + syntax. The client SHOULD NOT repeat the request without + modifications. + + + + + + + + +Fielding, et al. Standards Track [Page 65] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.2 401 Unauthorized + + The request requires user authentication. The response MUST include a + WWW-Authenticate header field (section 14.47) containing a challenge + applicable to the requested resource. The client MAY repeat the + request with a suitable Authorization header field (section 14.8). If + the request already included Authorization credentials, then the 401 + response indicates that authorization has been refused for those + credentials. If the 401 response contains the same challenge as the + prior response, and the user agent has already attempted + authentication at least once, then the user SHOULD be presented the + entity that was given in the response, since that entity might + include relevant diagnostic information. HTTP access authentication + is explained in "HTTP Authentication: Basic and Digest Access + Authentication" [43]. + +10.4.3 402 Payment Required + + This code is reserved for future use. + +10.4.4 403 Forbidden + + The server understood the request, but is refusing to fulfill it. + Authorization will not help and the request SHOULD NOT be repeated. + If the request method was not HEAD and the server wishes to make + public why the request has not been fulfilled, it SHOULD describe the + reason for the refusal in the entity. If the server does not wish to + make this information available to the client, the status code 404 + (Not Found) can be used instead. + +10.4.5 404 Not Found + + The server has not found anything matching the Request-URI. No + indication is given of whether the condition is temporary or + permanent. The 410 (Gone) status code SHOULD be used if the server + knows, through some internally configurable mechanism, that an old + resource is permanently unavailable and has no forwarding address. + This status code is commonly used when the server does not wish to + reveal exactly why the request has been refused, or when no other + response is applicable. + +10.4.6 405 Method Not Allowed + + The method specified in the Request-Line is not allowed for the + resource identified by the Request-URI. The response MUST include an + Allow header containing a list of valid methods for the requested + resource. + + + + +Fielding, et al. Standards Track [Page 66] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.7 406 Not Acceptable + + The resource identified by the request is only capable of generating + response entities which have content characteristics not acceptable + according to the accept headers sent in the request. + + Unless it was a HEAD request, the response SHOULD include an entity + containing a list of available entity characteristics and location(s) + from which the user or user agent can choose the one most + appropriate. The entity format is specified by the media type given + in the Content-Type header field. Depending upon the format and the + capabilities of the user agent, selection of the most appropriate + choice MAY be performed automatically. However, this specification + does not define any standard for such automatic selection. + + Note: HTTP/1.1 servers are allowed to return responses which are + not acceptable according to the accept headers sent in the + request. In some cases, this may even be preferable to sending a + 406 response. User agents are encouraged to inspect the headers of + an incoming response to determine if it is acceptable. + + If the response could be unacceptable, a user agent SHOULD + temporarily stop receipt of more data and query the user for a + decision on further actions. + +10.4.8 407 Proxy Authentication Required + + This code is similar to 401 (Unauthorized), but indicates that the + client must first authenticate itself with the proxy. The proxy MUST + return a Proxy-Authenticate header field (section 14.33) containing a + challenge applicable to the proxy for the requested resource. The + client MAY repeat the request with a suitable Proxy-Authorization + header field (section 14.34). HTTP access authentication is explained + in "HTTP Authentication: Basic and Digest Access Authentication" + [43]. + +10.4.9 408 Request Timeout + + The client did not produce a request within the time that the server + was prepared to wait. The client MAY repeat the request without + modifications at any later time. + +10.4.10 409 Conflict + + The request could not be completed due to a conflict with the current + state of the resource. This code is only allowed in situations where + it is expected that the user might be able to resolve the conflict + and resubmit the request. The response body SHOULD include enough + + + +Fielding, et al. Standards Track [Page 67] + +RFC 2616 HTTP/1.1 June 1999 + + + information for the user to recognize the source of the conflict. + Ideally, the response entity would include enough information for the + user or user agent to fix the problem; however, that might not be + possible and is not required. + + Conflicts are most likely to occur in response to a PUT request. For + example, if versioning were being used and the entity being PUT + included changes to a resource which conflict with those made by an + earlier (third-party) request, the server might use the 409 response + to indicate that it can't complete the request. In this case, the + response entity would likely contain a list of the differences + between the two versions in a format defined by the response + Content-Type. + +10.4.11 410 Gone + + The requested resource is no longer available at the server and no + forwarding address is known. This condition is expected to be + considered permanent. Clients with link editing capabilities SHOULD + delete references to the Request-URI after user approval. If the + server does not know, or has no facility to determine, whether or not + the condition is permanent, the status code 404 (Not Found) SHOULD be + used instead. This response is cacheable unless indicated otherwise. + + The 410 response is primarily intended to assist the task of web + maintenance by notifying the recipient that the resource is + intentionally unavailable and that the server owners desire that + remote links to that resource be removed. Such an event is common for + limited-time, promotional services and for resources belonging to + individuals no longer working at the server's site. It is not + necessary to mark all permanently unavailable resources as "gone" or + to keep the mark for any length of time -- that is left to the + discretion of the server owner. + +10.4.12 411 Length Required + + The server refuses to accept the request without a defined Content- + Length. The client MAY repeat the request if it adds a valid + Content-Length header field containing the length of the message-body + in the request message. + +10.4.13 412 Precondition Failed + + The precondition given in one or more of the request-header fields + evaluated to false when it was tested on the server. This response + code allows the client to place preconditions on the current resource + metainformation (header field data) and thus prevent the requested + method from being applied to a resource other than the one intended. + + + +Fielding, et al. Standards Track [Page 68] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.14 413 Request Entity Too Large + + The server is refusing to process a request because the request + entity is larger than the server is willing or able to process. The + server MAY close the connection to prevent the client from continuing + the request. + + If the condition is temporary, the server SHOULD include a Retry- + After header field to indicate that it is temporary and after what + time the client MAY try again. + +10.4.15 414 Request-URI Too Long + + The server is refusing to service the request because the Request-URI + is longer than the server is willing to interpret. This rare + condition is only likely to occur when a client has improperly + converted a POST request to a GET request with long query + information, when the client has descended into a URI "black hole" of + redirection (e.g., a redirected URI prefix that points to a suffix of + itself), or when the server is under attack by a client attempting to + exploit security holes present in some servers using fixed-length + buffers for reading or manipulating the Request-URI. + +10.4.16 415 Unsupported Media Type + + The server is refusing to service the request because the entity of + the request is in a format not supported by the requested resource + for the requested method. + +10.4.17 416 Requested Range Not Satisfiable + + A server SHOULD return a response with this status code if a request + included a Range request-header field (section 14.35), and none of + the range-specifier values in this field overlap the current extent + of the selected resource, and the request did not include an If-Range + request-header field. (For byte-ranges, this means that the first- + byte-pos of all of the byte-range-spec values were greater than the + current length of the selected resource.) + + When this status code is returned for a byte-range request, the + response SHOULD include a Content-Range entity-header field + specifying the current length of the selected resource (see section + 14.16). This response MUST NOT use the multipart/byteranges content- + type. + + + + + + + +Fielding, et al. Standards Track [Page 69] + +RFC 2616 HTTP/1.1 June 1999 + + +10.4.18 417 Expectation Failed + + The expectation given in an Expect request-header field (see section + 14.20) could not be met by this server, or, if the server is a proxy, + the server has unambiguous evidence that the request could not be met + by the next-hop server. + +10.5 Server Error 5xx + + Response status codes beginning with the digit "5" indicate cases in + which the server is aware that it has erred or is incapable of + performing the request. Except when responding to a HEAD request, the + server SHOULD include an entity containing an explanation of the + error situation, and whether it is a temporary or permanent + condition. User agents SHOULD display any included entity to the + user. These response codes are applicable to any request method. + +10.5.1 500 Internal Server Error + + The server encountered an unexpected condition which prevented it + from fulfilling the request. + +10.5.2 501 Not Implemented + + The server does not support the functionality required to fulfill the + request. This is the appropriate response when the server does not + recognize the request method and is not capable of supporting it for + any resource. + +10.5.3 502 Bad Gateway + + The server, while acting as a gateway or proxy, received an invalid + response from the upstream server it accessed in attempting to + fulfill the request. + +10.5.4 503 Service Unavailable + + The server is currently unable to handle the request due to a + temporary overloading or maintenance of the server. The implication + is that this is a temporary condition which will be alleviated after + some delay. If known, the length of the delay MAY be indicated in a + Retry-After header. If no Retry-After is given, the client SHOULD + handle the response as it would for a 500 response. + + Note: The existence of the 503 status code does not imply that a + server must use it when becoming overloaded. Some servers may wish + to simply refuse the connection. + + + + +Fielding, et al. Standards Track [Page 70] + +RFC 2616 HTTP/1.1 June 1999 + + +10.5.5 504 Gateway Timeout + + The server, while acting as a gateway or proxy, did not receive a + timely response from the upstream server specified by the URI (e.g. + HTTP, FTP, LDAP) or some other auxiliary server (e.g. DNS) it needed + to access in attempting to complete the request. + + Note: Note to implementors: some deployed proxies are known to + return 400 or 500 when DNS lookups time out. + +10.5.6 505 HTTP Version Not Supported + + The server does not support, or refuses to support, the HTTP protocol + version that was used in the request message. The server is + indicating that it is unable or unwilling to complete the request + using the same major version as the client, as described in section + 3.1, other than with this error message. The response SHOULD contain + an entity describing why that version is not supported and what other + protocols are supported by that server. + +11 Access Authentication + + HTTP provides several OPTIONAL challenge-response authentication + mechanisms which can be used by a server to challenge a client + request and by a client to provide authentication information. The + general framework for access authentication, and the specification of + "basic" and "digest" authentication, are specified in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. This + specification adopts the definitions of "challenge" and "credentials" + from that specification. + +12 Content Negotiation + + Most HTTP responses include an entity which contains information for + interpretation by a human user. Naturally, it is desirable to supply + the user with the "best available" entity corresponding to the + request. Unfortunately for servers and caches, not all users have the + same preferences for what is "best," and not all user agents are + equally capable of rendering all entity types. For that reason, HTTP + has provisions for several mechanisms for "content negotiation" -- + the process of selecting the best representation for a given response + when there are multiple representations available. + + Note: This is not called "format negotiation" because the + alternate representations may be of the same media type, but use + different capabilities of that type, be in different languages, + etc. + + + + +Fielding, et al. Standards Track [Page 71] + +RFC 2616 HTTP/1.1 June 1999 + + + Any response containing an entity-body MAY be subject to negotiation, + including error responses. + + There are two kinds of content negotiation which are possible in + HTTP: server-driven and agent-driven negotiation. These two kinds of + negotiation are orthogonal and thus may be used separately or in + combination. One method of combination, referred to as transparent + negotiation, occurs when a cache uses the agent-driven negotiation + information provided by the origin server in order to provide + server-driven negotiation for subsequent requests. + +12.1 Server-driven Negotiation + + If the selection of the best representation for a response is made by + an algorithm located at the server, it is called server-driven + negotiation. Selection is based on the available representations of + the response (the dimensions over which it can vary; e.g. language, + content-coding, etc.) and the contents of particular header fields in + the request message or on other information pertaining to the request + (such as the network address of the client). + + Server-driven negotiation is advantageous when the algorithm for + selecting from among the available representations is difficult to + describe to the user agent, or when the server desires to send its + "best guess" to the client along with the first response (hoping to + avoid the round-trip delay of a subsequent request if the "best + guess" is good enough for the user). In order to improve the server's + guess, the user agent MAY include request header fields (Accept, + Accept-Language, Accept-Encoding, etc.) which describe its + preferences for such a response. + + Server-driven negotiation has disadvantages: + + 1. It is impossible for the server to accurately determine what + might be "best" for any given user, since that would require + complete knowledge of both the capabilities of the user agent + and the intended use for the response (e.g., does the user want + to view it on screen or print it on paper?). + + 2. Having the user agent describe its capabilities in every + request can be both very inefficient (given that only a small + percentage of responses have multiple representations) and a + potential violation of the user's privacy. + + 3. It complicates the implementation of an origin server and the + algorithms for generating responses to a request. + + + + + +Fielding, et al. Standards Track [Page 72] + +RFC 2616 HTTP/1.1 June 1999 + + + 4. It may limit a public cache's ability to use the same response + for multiple user's requests. + + HTTP/1.1 includes the following request-header fields for enabling + server-driven negotiation through description of user agent + capabilities and user preferences: Accept (section 14.1), Accept- + Charset (section 14.2), Accept-Encoding (section 14.3), Accept- + Language (section 14.4), and User-Agent (section 14.43). However, an + origin server is not limited to these dimensions and MAY vary the + response based on any aspect of the request, including information + outside the request-header fields or within extension header fields + not defined by this specification. + + The Vary header field can be used to express the parameters the + server uses to select a representation that is subject to server- + driven negotiation. See section 13.6 for use of the Vary header field + by caches and section 14.44 for use of the Vary header field by + servers. + +12.2 Agent-driven Negotiation + + With agent-driven negotiation, selection of the best representation + for a response is performed by the user agent after receiving an + initial response from the origin server. Selection is based on a list + of the available representations of the response included within the + header fields or entity-body of the initial response, with each + representation identified by its own URI. Selection from among the + representations may be performed automatically (if the user agent is + capable of doing so) or manually by the user selecting from a + generated (possibly hypertext) menu. + + Agent-driven negotiation is advantageous when the response would vary + over commonly-used dimensions (such as type, language, or encoding), + when the origin server is unable to determine a user agent's + capabilities from examining the request, and generally when public + caches are used to distribute server load and reduce network usage. + + Agent-driven negotiation suffers from the disadvantage of needing a + second request to obtain the best alternate representation. This + second request is only efficient when caching is used. In addition, + this specification does not define any mechanism for supporting + automatic selection, though it also does not prevent any such + mechanism from being developed as an extension and used within + HTTP/1.1. + + + + + + + +Fielding, et al. Standards Track [Page 73] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 defines the 300 (Multiple Choices) and 406 (Not Acceptable) + status codes for enabling agent-driven negotiation when the server is + unwilling or unable to provide a varying response using server-driven + negotiation. + +12.3 Transparent Negotiation + + Transparent negotiation is a combination of both server-driven and + agent-driven negotiation. When a cache is supplied with a form of the + list of available representations of the response (as in agent-driven + negotiation) and the dimensions of variance are completely understood + by the cache, then the cache becomes capable of performing server- + driven negotiation on behalf of the origin server for subsequent + requests on that resource. + + Transparent negotiation has the advantage of distributing the + negotiation work that would otherwise be required of the origin + server and also removing the second request delay of agent-driven + negotiation when the cache is able to correctly guess the right + response. + + This specification does not define any mechanism for transparent + negotiation, though it also does not prevent any such mechanism from + being developed as an extension that could be used within HTTP/1.1. + +13 Caching in HTTP + + HTTP is typically used for distributed information systems, where + performance can be improved by the use of response caches. The + HTTP/1.1 protocol includes a number of elements intended to make + caching work as well as possible. Because these elements are + inextricable from other aspects of the protocol, and because they + interact with each other, it is useful to describe the basic caching + design of HTTP separately from the detailed descriptions of methods, + headers, response codes, etc. + + Caching would be useless if it did not significantly improve + performance. The goal of caching in HTTP/1.1 is to eliminate the need + to send requests in many cases, and to eliminate the need to send + full responses in many other cases. The former reduces the number of + network round-trips required for many operations; we use an + "expiration" mechanism for this purpose (see section 13.2). The + latter reduces network bandwidth requirements; we use a "validation" + mechanism for this purpose (see section 13.3). + + Requirements for performance, availability, and disconnected + operation require us to be able to relax the goal of semantic + transparency. The HTTP/1.1 protocol allows origin servers, caches, + + + +Fielding, et al. Standards Track [Page 74] + +RFC 2616 HTTP/1.1 June 1999 + + + and clients to explicitly reduce transparency when necessary. + However, because non-transparent operation may confuse non-expert + users, and might be incompatible with certain server applications + (such as those for ordering merchandise), the protocol requires that + transparency be relaxed + + - only by an explicit protocol-level request when relaxed by + client or origin server + + - only with an explicit warning to the end user when relaxed by + cache or client + + Therefore, the HTTP/1.1 protocol provides these important elements: + + 1. Protocol features that provide full semantic transparency when + this is required by all parties. + + 2. Protocol features that allow an origin server or user agent to + explicitly request and control non-transparent operation. + + 3. Protocol features that allow a cache to attach warnings to + responses that do not preserve the requested approximation of + semantic transparency. + + A basic principle is that it must be possible for the clients to + detect any potential relaxation of semantic transparency. + + Note: The server, cache, or client implementor might be faced with + design decisions not explicitly discussed in this specification. + If a decision might affect semantic transparency, the implementor + ought to err on the side of maintaining transparency unless a + careful and complete analysis shows significant benefits in + breaking transparency. + +13.1.1 Cache Correctness + + A correct cache MUST respond to a request with the most up-to-date + response held by the cache that is appropriate to the request (see + sections 13.2.5, 13.2.6, and 13.12) which meets one of the following + conditions: + + 1. It has been checked for equivalence with what the origin server + would have returned by revalidating the response with the + origin server (section 13.3); + + + + + + + +Fielding, et al. Standards Track [Page 75] + +RFC 2616 HTTP/1.1 June 1999 + + + 2. It is "fresh enough" (see section 13.2). In the default case, + this means it meets the least restrictive freshness requirement + of the client, origin server, and cache (see section 14.9); if + the origin server so specifies, it is the freshness requirement + of the origin server alone. + + If a stored response is not "fresh enough" by the most + restrictive freshness requirement of both the client and the + origin server, in carefully considered circumstances the cache + MAY still return the response with the appropriate Warning + header (see section 13.1.5 and 14.46), unless such a response + is prohibited (e.g., by a "no-store" cache-directive, or by a + "no-cache" cache-request-directive; see section 14.9). + + 3. It is an appropriate 304 (Not Modified), 305 (Proxy Redirect), + or error (4xx or 5xx) response message. + + If the cache can not communicate with the origin server, then a + correct cache SHOULD respond as above if the response can be + correctly served from the cache; if not it MUST return an error or + warning indicating that there was a communication failure. + + If a cache receives a response (either an entire response, or a 304 + (Not Modified) response) that it would normally forward to the + requesting client, and the received response is no longer fresh, the + cache SHOULD forward it to the requesting client without adding a new + Warning (but without removing any existing Warning headers). A cache + SHOULD NOT attempt to revalidate a response simply because that + response became stale in transit; this might lead to an infinite + loop. A user agent that receives a stale response without a Warning + MAY display a warning indication to the user. + +13.1.2 Warnings + + Whenever a cache returns a response that is neither first-hand nor + "fresh enough" (in the sense of condition 2 in section 13.1.1), it + MUST attach a warning to that effect, using a Warning general-header. + The Warning header and the currently defined warnings are described + in section 14.46. The warning allows clients to take appropriate + action. + + Warnings MAY be used for other purposes, both cache-related and + otherwise. The use of a warning, rather than an error status code, + distinguish these responses from true failures. + + Warnings are assigned three digit warn-codes. The first digit + indicates whether the Warning MUST or MUST NOT be deleted from a + stored cache entry after a successful revalidation: + + + +Fielding, et al. Standards Track [Page 76] + +RFC 2616 HTTP/1.1 June 1999 + + + 1xx Warnings that describe the freshness or revalidation status of + the response, and so MUST be deleted after a successful + revalidation. 1XX warn-codes MAY be generated by a cache only when + validating a cached entry. It MUST NOT be generated by clients. + + 2xx Warnings that describe some aspect of the entity body or entity + headers that is not rectified by a revalidation (for example, a + lossy compression of the entity bodies) and which MUST NOT be + deleted after a successful revalidation. + + See section 14.46 for the definitions of the codes themselves. + + HTTP/1.0 caches will cache all Warnings in responses, without + deleting the ones in the first category. Warnings in responses that + are passed to HTTP/1.0 caches carry an extra warning-date field, + which prevents a future HTTP/1.1 recipient from believing an + erroneously cached Warning. + + Warnings also carry a warning text. The text MAY be in any + appropriate natural language (perhaps based on the client's Accept + headers), and include an OPTIONAL indication of what character set is + used. + + Multiple warnings MAY be attached to a response (either by the origin + server or by a cache), including multiple warnings with the same code + number. For example, a server might provide the same warning with + texts in both English and Basque. + + When multiple warnings are attached to a response, it might not be + practical or reasonable to display all of them to the user. This + version of HTTP does not specify strict priority rules for deciding + which warnings to display and in what order, but does suggest some + heuristics. + +13.1.3 Cache-control Mechanisms + + The basic cache mechanisms in HTTP/1.1 (server-specified expiration + times and validators) are implicit directives to caches. In some + cases, a server or client might need to provide explicit directives + to the HTTP caches. We use the Cache-Control header for this purpose. + + The Cache-Control header allows a client or server to transmit a + variety of directives in either requests or responses. These + directives typically override the default caching algorithms. As a + general rule, if there is any apparent conflict between header + values, the most restrictive interpretation is applied (that is, the + one that is most likely to preserve semantic transparency). However, + + + + +Fielding, et al. Standards Track [Page 77] + +RFC 2616 HTTP/1.1 June 1999 + + + in some cases, cache-control directives are explicitly specified as + weakening the approximation of semantic transparency (for example, + "max-stale" or "public"). + + The cache-control directives are described in detail in section 14.9. + +13.1.4 Explicit User Agent Warnings + + Many user agents make it possible for users to override the basic + caching mechanisms. For example, the user agent might allow the user + to specify that cached entities (even explicitly stale ones) are + never validated. Or the user agent might habitually add "Cache- + Control: max-stale=3600" to every request. The user agent SHOULD NOT + default to either non-transparent behavior, or behavior that results + in abnormally ineffective caching, but MAY be explicitly configured + to do so by an explicit action of the user. + + If the user has overridden the basic caching mechanisms, the user + agent SHOULD explicitly indicate to the user whenever this results in + the display of information that might not meet the server's + transparency requirements (in particular, if the displayed entity is + known to be stale). Since the protocol normally allows the user agent + to determine if responses are stale or not, this indication need only + be displayed when this actually happens. The indication need not be a + dialog box; it could be an icon (for example, a picture of a rotting + fish) or some other indicator. + + If the user has overridden the caching mechanisms in a way that would + abnormally reduce the effectiveness of caches, the user agent SHOULD + continually indicate this state to the user (for example, by a + display of a picture of currency in flames) so that the user does not + inadvertently consume excess resources or suffer from excessive + latency. + +13.1.5 Exceptions to the Rules and Warnings + + In some cases, the operator of a cache MAY choose to configure it to + return stale responses even when not requested by clients. This + decision ought not be made lightly, but may be necessary for reasons + of availability or performance, especially when the cache is poorly + connected to the origin server. Whenever a cache returns a stale + response, it MUST mark it as such (using a Warning header) enabling + the client software to alert the user that there might be a potential + problem. + + + + + + + +Fielding, et al. Standards Track [Page 78] + +RFC 2616 HTTP/1.1 June 1999 + + + It also allows the user agent to take steps to obtain a first-hand or + fresh response. For this reason, a cache SHOULD NOT return a stale + response if the client explicitly requests a first-hand or fresh one, + unless it is impossible to comply for technical or policy reasons. + +13.1.6 Client-controlled Behavior + + While the origin server (and to a lesser extent, intermediate caches, + by their contribution to the age of a response) are the primary + source of expiration information, in some cases the client might need + to control a cache's decision about whether to return a cached + response without validating it. Clients do this using several + directives of the Cache-Control header. + + A client's request MAY specify the maximum age it is willing to + accept of an unvalidated response; specifying a value of zero forces + the cache(s) to revalidate all responses. A client MAY also specify + the minimum time remaining before a response expires. Both of these + options increase constraints on the behavior of caches, and so cannot + further relax the cache's approximation of semantic transparency. + + A client MAY also specify that it will accept stale responses, up to + some maximum amount of staleness. This loosens the constraints on the + caches, and so might violate the origin server's specified + constraints on semantic transparency, but might be necessary to + support disconnected operation, or high availability in the face of + poor connectivity. + +13.2 Expiration Model + +13.2.1 Server-Specified Expiration + + HTTP caching works best when caches can entirely avoid making + requests to the origin server. The primary mechanism for avoiding + requests is for an origin server to provide an explicit expiration + time in the future, indicating that a response MAY be used to satisfy + subsequent requests. In other words, a cache can return a fresh + response without first contacting the server. + + Our expectation is that servers will assign future explicit + expiration times to responses in the belief that the entity is not + likely to change, in a semantically significant way, before the + expiration time is reached. This normally preserves semantic + transparency, as long as the server's expiration times are carefully + chosen. + + + + + + +Fielding, et al. Standards Track [Page 79] + +RFC 2616 HTTP/1.1 June 1999 + + + The expiration mechanism applies only to responses taken from a cache + and not to first-hand responses forwarded immediately to the + requesting client. + + If an origin server wishes to force a semantically transparent cache + to validate every request, it MAY assign an explicit expiration time + in the past. This means that the response is always stale, and so the + cache SHOULD validate it before using it for subsequent requests. See + section 14.9.4 for a more restrictive way to force revalidation. + + If an origin server wishes to force any HTTP/1.1 cache, no matter how + it is configured, to validate every request, it SHOULD use the "must- + revalidate" cache-control directive (see section 14.9). + + Servers specify explicit expiration times using either the Expires + header, or the max-age directive of the Cache-Control header. + + An expiration time cannot be used to force a user agent to refresh + its display or reload a resource; its semantics apply only to caching + mechanisms, and such mechanisms need only check a resource's + expiration status when a new request for that resource is initiated. + See section 13.13 for an explanation of the difference between caches + and history mechanisms. + +13.2.2 Heuristic Expiration + + Since origin servers do not always provide explicit expiration times, + HTTP caches typically assign heuristic expiration times, employing + algorithms that use other header values (such as the Last-Modified + time) to estimate a plausible expiration time. The HTTP/1.1 + specification does not provide specific algorithms, but does impose + worst-case constraints on their results. Since heuristic expiration + times might compromise semantic transparency, they ought to used + cautiously, and we encourage origin servers to provide explicit + expiration times as much as possible. + +13.2.3 Age Calculations + + In order to know if a cached entry is fresh, a cache needs to know if + its age exceeds its freshness lifetime. We discuss how to calculate + the latter in section 13.2.4; this section describes how to calculate + the age of a response or cache entry. + + In this discussion, we use the term "now" to mean "the current value + of the clock at the host performing the calculation." Hosts that use + HTTP, but especially hosts running origin servers and caches, SHOULD + use NTP [28] or some similar protocol to synchronize their clocks to + a globally accurate time standard. + + + +Fielding, et al. Standards Track [Page 80] + +RFC 2616 HTTP/1.1 June 1999 + + + HTTP/1.1 requires origin servers to send a Date header, if possible, + with every response, giving the time at which the response was + generated (see section 14.18). We use the term "date_value" to denote + the value of the Date header, in a form appropriate for arithmetic + operations. + + HTTP/1.1 uses the Age response-header to convey the estimated age of + the response message when obtained from a cache. The Age field value + is the cache's estimate of the amount of time since the response was + generated or revalidated by the origin server. + + In essence, the Age value is the sum of the time that the response + has been resident in each of the caches along the path from the + origin server, plus the amount of time it has been in transit along + network paths. + + We use the term "age_value" to denote the value of the Age header, in + a form appropriate for arithmetic operations. + + A response's age can be calculated in two entirely independent ways: + + 1. now minus date_value, if the local clock is reasonably well + synchronized to the origin server's clock. If the result is + negative, the result is replaced by zero. + + 2. age_value, if all of the caches along the response path + implement HTTP/1.1. + + Given that we have two independent ways to compute the age of a + response when it is received, we can combine these as + + corrected_received_age = max(now - date_value, age_value) + + and as long as we have either nearly synchronized clocks or all- + HTTP/1.1 paths, one gets a reliable (conservative) result. + + Because of network-imposed delays, some significant interval might + pass between the time that a server generates a response and the time + it is received at the next outbound cache or client. If uncorrected, + this delay could result in improperly low ages. + + Because the request that resulted in the returned Age value must have + been initiated prior to that Age value's generation, we can correct + for delays imposed by the network by recording the time at which the + request was initiated. Then, when an Age value is received, it MUST + be interpreted relative to the time the request was initiated, not + + + + + +Fielding, et al. Standards Track [Page 81] + +RFC 2616 HTTP/1.1 June 1999 + + + the time that the response was received. This algorithm results in + conservative behavior no matter how much delay is experienced. So, we + compute: + + corrected_initial_age = corrected_received_age + + (now - request_time) + + where "request_time" is the time (according to the local clock) when + the request that elicited this response was sent. + + Summary of age calculation algorithm, when a cache receives a + response: + + /* + * age_value + * is the value of Age: header received by the cache with + * this response. + * date_value + * is the value of the origin server's Date: header + * request_time + * is the (local) time when the cache made the request + * that resulted in this cached response + * response_time + * is the (local) time when the cache received the + * response + * now + * is the current (local) time + */ + + apparent_age = max(0, response_time - date_value); + corrected_received_age = max(apparent_age, age_value); + response_delay = response_time - request_time; + corrected_initial_age = corrected_received_age + response_delay; + resident_time = now - response_time; + current_age = corrected_initial_age + resident_time; + + The current_age of a cache entry is calculated by adding the amount + of time (in seconds) since the cache entry was last validated by the + origin server to the corrected_initial_age. When a response is + generated from a cache entry, the cache MUST include a single Age + header field in the response with a value equal to the cache entry's + current_age. + + The presence of an Age header field in a response implies that a + response is not first-hand. However, the converse is not true, since + the lack of an Age header field in a response does not imply that the + + + + + +Fielding, et al. Standards Track [Page 82] + +RFC 2616 HTTP/1.1 June 1999 + + + response is first-hand unless all caches along the request path are + compliant with HTTP/1.1 (i.e., older HTTP caches did not implement + the Age header field). + +13.2.4 Expiration Calculations + + In order to decide whether a response is fresh or stale, we need to + compare its freshness lifetime to its age. The age is calculated as + described in section 13.2.3; this section describes how to calculate + the freshness lifetime, and to determine if a response has expired. + In the discussion below, the values can be represented in any form + appropriate for arithmetic operations. + + We use the term "expires_value" to denote the value of the Expires + header. We use the term "max_age_value" to denote an appropriate + value of the number of seconds carried by the "max-age" directive of + the Cache-Control header in a response (see section 14.9.3). + + The max-age directive takes priority over Expires, so if max-age is + present in a response, the calculation is simply: + + freshness_lifetime = max_age_value + + Otherwise, if Expires is present in the response, the calculation is: + + freshness_lifetime = expires_value - date_value + + Note that neither of these calculations is vulnerable to clock skew, + since all of the information comes from the origin server. + + If none of Expires, Cache-Control: max-age, or Cache-Control: s- + maxage (see section 14.9.3) appears in the response, and the response + does not include other restrictions on caching, the cache MAY compute + a freshness lifetime using a heuristic. The cache MUST attach Warning + 113 to any response whose age is more than 24 hours if such warning + has not already been added. + + Also, if the response does have a Last-Modified time, the heuristic + expiration value SHOULD be no more than some fraction of the interval + since that time. A typical setting of this fraction might be 10%. + + The calculation to determine if a response has expired is quite + simple: + + response_is_fresh = (freshness_lifetime > current_age) + + + + + + +Fielding, et al. Standards Track [Page 83] + +RFC 2616 HTTP/1.1 June 1999 + + +13.2.5 Disambiguating Expiration Values + + Because expiration values are assigned optimistically, it is possible + for two caches to contain fresh values for the same resource that are + different. + + If a client performing a retrieval receives a non-first-hand response + for a request that was already fresh in its own cache, and the Date + header in its existing cache entry is newer than the Date on the new + response, then the client MAY ignore the response. If so, it MAY + retry the request with a "Cache-Control: max-age=0" directive (see + section 14.9), to force a check with the origin server. + + If a cache has two fresh responses for the same representation with + different validators, it MUST use the one with the more recent Date + header. This situation might arise because the cache is pooling + responses from other caches, or because a client has asked for a + reload or a revalidation of an apparently fresh cache entry. + +13.2.6 Disambiguating Multiple Responses + + Because a client might be receiving responses via multiple paths, so + that some responses flow through one set of caches and other + responses flow through a different set of caches, a client might + receive responses in an order different from that in which the origin + server sent them. We would like the client to use the most recently + generated response, even if older responses are still apparently + fresh. + + Neither the entity tag nor the expiration value can impose an + ordering on responses, since it is possible that a later response + intentionally carries an earlier expiration time. The Date values are + ordered to a granularity of one second. + + When a client tries to revalidate a cache entry, and the response it + receives contains a Date header that appears to be older than the one + for the existing entry, then the client SHOULD repeat the request + unconditionally, and include + + Cache-Control: max-age=0 + + to force any intermediate caches to validate their copies directly + with the origin server, or + + Cache-Control: no-cache + + to force any intermediate caches to obtain a new copy from the origin + server. + + + +Fielding, et al. Standards Track [Page 84] + +RFC 2616 HTTP/1.1 June 1999 + + + If the Date values are equal, then the client MAY use either response + (or MAY, if it is being extremely prudent, request a new response). + Servers MUST NOT depend on clients being able to choose + deterministically between responses generated during the same second, + if their expiration times overlap. + +13.3 Validation Model + + When a cache has a stale entry that it would like to use as a + response to a client's request, it first has to check with the origin + server (or possibly an intermediate cache with a fresh response) to + see if its cached entry is still usable. We call this "validating" + the cache entry. Since we do not want to have to pay the overhead of + retransmitting the full response if the cached entry is good, and we + do not want to pay the overhead of an extra round trip if the cached + entry is invalid, the HTTP/1.1 protocol supports the use of + conditional methods. + + The key protocol features for supporting conditional methods are + those concerned with "cache validators." When an origin server + generates a full response, it attaches some sort of validator to it, + which is kept with the cache entry. When a client (user agent or + proxy cache) makes a conditional request for a resource for which it + has a cache entry, it includes the associated validator in the + request. + + The server then checks that validator against the current validator + for the entity, and, if they match (see section 13.3.3), it responds + with a special status code (usually, 304 (Not Modified)) and no + entity-body. Otherwise, it returns a full response (including + entity-body). Thus, we avoid transmitting the full response if the + validator matches, and we avoid an extra round trip if it does not + match. + + In HTTP/1.1, a conditional request looks exactly the same as a normal + request for the same resource, except that it carries a special + header (which includes the validator) that implicitly turns the + method (usually, GET) into a conditional. + + The protocol includes both positive and negative senses of cache- + validating conditions. That is, it is possible to request either that + a method be performed if and only if a validator matches or if and + only if no validators match. + + + + + + + + +Fielding, et al. Standards Track [Page 85] + +RFC 2616 HTTP/1.1 June 1999 + + + Note: a response that lacks a validator may still be cached, and + served from cache until it expires, unless this is explicitly + prohibited by a cache-control directive. However, a cache cannot + do a conditional retrieval if it does not have a validator for the + entity, which means it will not be refreshable after it expires. + +13.3.1 Last-Modified Dates + + The Last-Modified entity-header field value is often used as a cache + validator. In simple terms, a cache entry is considered to be valid + if the entity has not been modified since the Last-Modified value. + +13.3.2 Entity Tag Cache Validators + + The ETag response-header field value, an entity tag, provides for an + "opaque" cache validator. This might allow more reliable validation + in situations where it is inconvenient to store modification dates, + where the one-second resolution of HTTP date values is not + sufficient, or where the origin server wishes to avoid certain + paradoxes that might arise from the use of modification dates. + + Entity Tags are described in section 3.11. The headers used with + entity tags are described in sections 14.19, 14.24, 14.26 and 14.44. + +13.3.3 Weak and Strong Validators + + Since both origin servers and caches will compare two validators to + decide if they represent the same or different entities, one normally + would expect that if the entity (the entity-body or any entity- + headers) changes in any way, then the associated validator would + change as well. If this is true, then we call this validator a + "strong validator." + + However, there might be cases when a server prefers to change the + validator only on semantically significant changes, and not when + insignificant aspects of the entity change. A validator that does not + always change when the resource changes is a "weak validator." + + Entity tags are normally "strong validators," but the protocol + provides a mechanism to tag an entity tag as "weak." One can think of + a strong validator as one that changes whenever the bits of an entity + changes, while a weak value changes whenever the meaning of an entity + changes. Alternatively, one can think of a strong validator as part + of an identifier for a specific entity, while a weak validator is + part of an identifier for a set of semantically equivalent entities. + + Note: One example of a strong validator is an integer that is + incremented in stable storage every time an entity is changed. + + + +Fielding, et al. Standards Track [Page 86] + +RFC 2616 HTTP/1.1 June 1999 + + + An entity's modification time, if represented with one-second + resolution, could be a weak validator, since it is possible that + the resource might be modified twice during a single second. + + Support for weak validators is optional. However, weak validators + allow for more efficient caching of equivalent objects; for + example, a hit counter on a site is probably good enough if it is + updated every few days or weeks, and any value during that period + is likely "good enough" to be equivalent. + + A "use" of a validator is either when a client generates a request + and includes the validator in a validating header field, or when a + server compares two validators. + + Strong validators are usable in any context. Weak validators are only + usable in contexts that do not depend on exact equality of an entity. + For example, either kind is usable for a conditional GET of a full + entity. However, only a strong validator is usable for a sub-range + retrieval, since otherwise the client might end up with an internally + inconsistent entity. + + Clients MAY issue simple (non-subrange) GET requests with either weak + validators or strong validators. Clients MUST NOT use weak validators + in other forms of request. + + The only function that the HTTP/1.1 protocol defines on validators is + comparison. There are two validator comparison functions, depending + on whether the comparison context allows the use of weak validators + or not: + + - The strong comparison function: in order to be considered equal, + both validators MUST be identical in every way, and both MUST + NOT be weak. + + - The weak comparison function: in order to be considered equal, + both validators MUST be identical in every way, but either or + both of them MAY be tagged as "weak" without affecting the + result. + + An entity tag is strong unless it is explicitly tagged as weak. + Section 3.11 gives the syntax for entity tags. + + A Last-Modified time, when used as a validator in a request, is + implicitly weak unless it is possible to deduce that it is strong, + using the following rules: + + - The validator is being compared by an origin server to the + actual current validator for the entity and, + + + +Fielding, et al. Standards Track [Page 87] + +RFC 2616 HTTP/1.1 June 1999 + + + - That origin server reliably knows that the associated entity did + not change twice during the second covered by the presented + validator. + + or + + - The validator is about to be used by a client in an If- + Modified-Since or If-Unmodified-Since header, because the client + has a cache entry for the associated entity, and + + - That cache entry includes a Date value, which gives the time + when the origin server sent the original response, and + + - The presented Last-Modified time is at least 60 seconds before + the Date value. + + or + + - The validator is being compared by an intermediate cache to the + validator stored in its cache entry for the entity, and + + - That cache entry includes a Date value, which gives the time + when the origin server sent the original response, and + + - The presented Last-Modified time is at least 60 seconds before + the Date value. + + This method relies on the fact that if two different responses were + sent by the origin server during the same second, but both had the + same Last-Modified time, then at least one of those responses would + have a Date value equal to its Last-Modified time. The arbitrary 60- + second limit guards against the possibility that the Date and Last- + Modified values are generated from different clocks, or at somewhat + different times during the preparation of the response. An + implementation MAY use a value larger than 60 seconds, if it is + believed that 60 seconds is too short. + + If a client wishes to perform a sub-range retrieval on a value for + which it has only a Last-Modified time and no opaque validator, it + MAY do this only if the Last-Modified time is strong in the sense + described here. + + A cache or origin server receiving a conditional request, other than + a full-body GET request, MUST use the strong comparison function to + evaluate the condition. + + These rules allow HTTP/1.1 caches and clients to safely perform sub- + range retrievals on values that have been obtained from HTTP/1.0 + + + +Fielding, et al. Standards Track [Page 88] + +RFC 2616 HTTP/1.1 June 1999 + + + servers. + +13.3.4 Rules for When to Use Entity Tags and Last-Modified Dates + + We adopt a set of rules and recommendations for origin servers, + clients, and caches regarding when various validator types ought to + be used, and for what purposes. + + HTTP/1.1 origin servers: + + - SHOULD send an entity tag validator unless it is not feasible to + generate one. + + - MAY send a weak entity tag instead of a strong entity tag, if + performance considerations support the use of weak entity tags, + or if it is unfeasible to send a strong entity tag. + + - SHOULD send a Last-Modified value if it is feasible to send one, + unless the risk of a breakdown in semantic transparency that + could result from using this date in an If-Modified-Since header + would lead to serious problems. + + In other words, the preferred behavior for an HTTP/1.1 origin server + is to send both a strong entity tag and a Last-Modified value. + + In order to be legal, a strong entity tag MUST change whenever the + associated entity value changes in any way. A weak entity tag SHOULD + change whenever the associated entity changes in a semantically + significant way. + + Note: in order to provide semantically transparent caching, an + origin server must avoid reusing a specific strong entity tag + value for two different entities, or reusing a specific weak + entity tag value for two semantically different entities. Cache + entries might persist for arbitrarily long periods, regardless of + expiration times, so it might be inappropriate to expect that a + cache will never again attempt to validate an entry using a + validator that it obtained at some point in the past. + + HTTP/1.1 clients: + + - If an entity tag has been provided by the origin server, MUST + use that entity tag in any cache-conditional request (using If- + Match or If-None-Match). + + - If only a Last-Modified value has been provided by the origin + server, SHOULD use that value in non-subrange cache-conditional + requests (using If-Modified-Since). + + + +Fielding, et al. Standards Track [Page 89] + +RFC 2616 HTTP/1.1 June 1999 + + + - If only a Last-Modified value has been provided by an HTTP/1.0 + origin server, MAY use that value in subrange cache-conditional + requests (using If-Unmodified-Since:). The user agent SHOULD + provide a way to disable this, in case of difficulty. + + - If both an entity tag and a Last-Modified value have been + provided by the origin server, SHOULD use both validators in + cache-conditional requests. This allows both HTTP/1.0 and + HTTP/1.1 caches to respond appropriately. + + An HTTP/1.1 origin server, upon receiving a conditional request that + includes both a Last-Modified date (e.g., in an If-Modified-Since or + If-Unmodified-Since header field) and one or more entity tags (e.g., + in an If-Match, If-None-Match, or If-Range header field) as cache + validators, MUST NOT return a response status of 304 (Not Modified) + unless doing so is consistent with all of the conditional header + fields in the request. + + An HTTP/1.1 caching proxy, upon receiving a conditional request that + includes both a Last-Modified date and one or more entity tags as + cache validators, MUST NOT return a locally cached response to the + client unless that cached response is consistent with all of the + conditional header fields in the request. + + Note: The general principle behind these rules is that HTTP/1.1 + servers and clients should transmit as much non-redundant + information as is available in their responses and requests. + HTTP/1.1 systems receiving this information will make the most + conservative assumptions about the validators they receive. + + HTTP/1.0 clients and caches will ignore entity tags. Generally, + last-modified values received or used by these systems will + support transparent and efficient caching, and so HTTP/1.1 origin + servers should provide Last-Modified values. In those rare cases + where the use of a Last-Modified value as a validator by an + HTTP/1.0 system could result in a serious problem, then HTTP/1.1 + origin servers should not provide one. + +13.3.5 Non-validating Conditionals + + The principle behind entity tags is that only the service author + knows the semantics of a resource well enough to select an + appropriate cache validation mechanism, and the specification of any + validator comparison function more complex than byte-equality would + open up a can of worms. Thus, comparisons of any other headers + (except Last-Modified, for compatibility with HTTP/1.0) are never + used for purposes of validating a cache entry. + + + + +Fielding, et al. Standards Track [Page 90] + +RFC 2616 HTTP/1.1 June 1999 + + +13.4 Response Cacheability + + Unless specifically constrained by a cache-control (section 14.9) + directive, a caching system MAY always store a successful response + (see section 13.8) as a cache entry, MAY return it without validation + if it is fresh, and MAY return it after successful validation. If + there is neither a cache validator nor an explicit expiration time + associated with a response, we do not expect it to be cached, but + certain caches MAY violate this expectation (for example, when little + or no network connectivity is available). A client can usually detect + that such a response was taken from a cache by comparing the Date + header to the current time. + + Note: some HTTP/1.0 caches are known to violate this expectation + without providing any Warning. + + However, in some cases it might be inappropriate for a cache to + retain an entity, or to return it in response to a subsequent + request. This might be because absolute semantic transparency is + deemed necessary by the service author, or because of security or + privacy considerations. Certain cache-control directives are + therefore provided so that the server can indicate that certain + resource entities, or portions thereof, are not to be cached + regardless of other considerations. + + Note that section 14.8 normally prevents a shared cache from saving + and returning a response to a previous request if that request + included an Authorization header. + + A response received with a status code of 200, 203, 206, 300, 301 or + 410 MAY be stored by a cache and used in reply to a subsequent + request, subject to the expiration mechanism, unless a cache-control + directive prohibits caching. However, a cache that does not support + the Range and Content-Range headers MUST NOT cache 206 (Partial + Content) responses. + + A response received with any other status code (e.g. status codes 302 + and 307) MUST NOT be returned in a reply to a subsequent request + unless there are cache-control directives or another header(s) that + explicitly allow it. For example, these include the following: an + Expires header (section 14.21); a "max-age", "s-maxage", "must- + revalidate", "proxy-revalidate", "public" or "private" cache-control + directive (section 14.9). + + + + + + + + +Fielding, et al. Standards Track [Page 91] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5 Constructing Responses From Caches + + The purpose of an HTTP cache is to store information received in + response to requests for use in responding to future requests. In + many cases, a cache simply returns the appropriate parts of a + response to the requester. However, if the cache holds a cache entry + based on a previous response, it might have to combine parts of a new + response with what is held in the cache entry. + +13.5.1 End-to-end and Hop-by-hop Headers + + For the purpose of defining the behavior of caches and non-caching + proxies, we divide HTTP headers into two categories: + + - End-to-end headers, which are transmitted to the ultimate + recipient of a request or response. End-to-end headers in + responses MUST be stored as part of a cache entry and MUST be + transmitted in any response formed from a cache entry. + + - Hop-by-hop headers, which are meaningful only for a single + transport-level connection, and are not stored by caches or + forwarded by proxies. + + The following HTTP/1.1 headers are hop-by-hop headers: + + - Connection + - Keep-Alive + - Proxy-Authenticate + - Proxy-Authorization + - TE + - Trailers + - Transfer-Encoding + - Upgrade + + All other headers defined by HTTP/1.1 are end-to-end headers. + + Other hop-by-hop headers MUST be listed in a Connection header, + (section 14.10) to be introduced into HTTP/1.1 (or later). + +13.5.2 Non-modifiable Headers + + Some features of the HTTP/1.1 protocol, such as Digest + Authentication, depend on the value of certain end-to-end headers. A + transparent proxy SHOULD NOT modify an end-to-end header unless the + definition of that header requires or specifically allows that. + + + + + + +Fielding, et al. Standards Track [Page 92] + +RFC 2616 HTTP/1.1 June 1999 + + + A transparent proxy MUST NOT modify any of the following fields in a + request or response, and it MUST NOT add any of these fields if not + already present: + + - Content-Location + + - Content-MD5 + + - ETag + + - Last-Modified + + A transparent proxy MUST NOT modify any of the following fields in a + response: + + - Expires + + but it MAY add any of these fields if not already present. If an + Expires header is added, it MUST be given a field-value identical to + that of the Date header in that response. + + A proxy MUST NOT modify or add any of the following fields in a + message that contains the no-transform cache-control directive, or in + any request: + + - Content-Encoding + + - Content-Range + + - Content-Type + + A non-transparent proxy MAY modify or add these fields to a message + that does not include no-transform, but if it does so, it MUST add a + Warning 214 (Transformation applied) if one does not already appear + in the message (see section 14.46). + + Warning: unnecessary modification of end-to-end headers might + cause authentication failures if stronger authentication + mechanisms are introduced in later versions of HTTP. Such + authentication mechanisms MAY rely on the values of header fields + not listed here. + + The Content-Length field of a request or response is added or deleted + according to the rules in section 4.4. A transparent proxy MUST + preserve the entity-length (section 7.2.2) of the entity-body, + although it MAY change the transfer-length (section 4.4). + + + + + +Fielding, et al. Standards Track [Page 93] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5.3 Combining Headers + + When a cache makes a validating request to a server, and the server + provides a 304 (Not Modified) response or a 206 (Partial Content) + response, the cache then constructs a response to send to the + requesting client. + + If the status code is 304 (Not Modified), the cache uses the entity- + body stored in the cache entry as the entity-body of this outgoing + response. If the status code is 206 (Partial Content) and the ETag or + Last-Modified headers match exactly, the cache MAY combine the + contents stored in the cache entry with the new contents received in + the response and use the result as the entity-body of this outgoing + response, (see 13.5.4). + + The end-to-end headers stored in the cache entry are used for the + constructed response, except that + + - any stored Warning headers with warn-code 1xx (see section + 14.46) MUST be deleted from the cache entry and the forwarded + response. + + - any stored Warning headers with warn-code 2xx MUST be retained + in the cache entry and the forwarded response. + + - any end-to-end headers provided in the 304 or 206 response MUST + replace the corresponding headers from the cache entry. + + Unless the cache decides to remove the cache entry, it MUST also + replace the end-to-end headers stored with the cache entry with + corresponding headers received in the incoming response, except for + Warning headers as described immediately above. If a header field- + name in the incoming response matches more than one header in the + cache entry, all such old headers MUST be replaced. + + In other words, the set of end-to-end headers received in the + incoming response overrides all corresponding end-to-end headers + stored with the cache entry (except for stored Warning headers with + warn-code 1xx, which are deleted even if not overridden). + + Note: this rule allows an origin server to use a 304 (Not + Modified) or a 206 (Partial Content) response to update any header + associated with a previous response for the same entity or sub- + ranges thereof, although it might not always be meaningful or + correct to do so. This rule does not allow an origin server to use + a 304 (Not Modified) or a 206 (Partial Content) response to + entirely delete a header that it had provided with a previous + response. + + + +Fielding, et al. Standards Track [Page 94] + +RFC 2616 HTTP/1.1 June 1999 + + +13.5.4 Combining Byte Ranges + + A response might transfer only a subrange of the bytes of an entity- + body, either because the request included one or more Range + specifications, or because a connection was broken prematurely. After + several such transfers, a cache might have received several ranges of + the same entity-body. + + If a cache has a stored non-empty set of subranges for an entity, and + an incoming response transfers another subrange, the cache MAY + combine the new subrange with the existing set if both the following + conditions are met: + + - Both the incoming response and the cache entry have a cache + validator. + + - The two cache validators match using the strong comparison + function (see section 13.3.3). + + If either requirement is not met, the cache MUST use only the most + recent partial response (based on the Date values transmitted with + every response, and using the incoming response if these values are + equal or missing), and MUST discard the other partial information. + +13.6 Caching Negotiated Responses + + Use of server-driven content negotiation (section 12.1), as indicated + by the presence of a Vary header field in a response, alters the + conditions and procedure by which a cache can use the response for + subsequent requests. See section 14.44 for use of the Vary header + field by servers. + + A server SHOULD use the Vary header field to inform a cache of what + request-header fields were used to select among multiple + representations of a cacheable response subject to server-driven + negotiation. The set of header fields named by the Vary field value + is known as the "selecting" request-headers. + + When the cache receives a subsequent request whose Request-URI + specifies one or more cache entries including a Vary header field, + the cache MUST NOT use such a cache entry to construct a response to + the new request unless all of the selecting request-headers present + in the new request match the corresponding stored request-headers in + the original request. + + The selecting request-headers from two requests are defined to match + if and only if the selecting request-headers in the first request can + be transformed to the selecting request-headers in the second request + + + +Fielding, et al. Standards Track [Page 95] + +RFC 2616 HTTP/1.1 June 1999 + + + by adding or removing linear white space (LWS) at places where this + is allowed by the corresponding BNF, and/or combining multiple + message-header fields with the same field name following the rules + about message headers in section 4.2. + + A Vary header field-value of "*" always fails to match and subsequent + requests on that resource can only be properly interpreted by the + origin server. + + If the selecting request header fields for the cached entry do not + match the selecting request header fields of the new request, then + the cache MUST NOT use a cached entry to satisfy the request unless + it first relays the new request to the origin server in a conditional + request and the server responds with 304 (Not Modified), including an + entity tag or Content-Location that indicates the entity to be used. + + If an entity tag was assigned to a cached representation, the + forwarded request SHOULD be conditional and include the entity tags + in an If-None-Match header field from all its cache entries for the + resource. This conveys to the server the set of entities currently + held by the cache, so that if any one of these entities matches the + requested entity, the server can use the ETag header field in its 304 + (Not Modified) response to tell the cache which entry is appropriate. + If the entity-tag of the new response matches that of an existing + entry, the new response SHOULD be used to update the header fields of + the existing entry, and the result MUST be returned to the client. + + If any of the existing cache entries contains only partial content + for the associated entity, its entity-tag SHOULD NOT be included in + the If-None-Match header field unless the request is for a range that + would be fully satisfied by that entry. + + If a cache receives a successful response whose Content-Location + field matches that of an existing cache entry for the same Request- + ]URI, whose entity-tag differs from that of the existing entry, and + whose Date is more recent than that of the existing entry, the + existing entry SHOULD NOT be returned in response to future requests + and SHOULD be deleted from the cache. + +13.7 Shared and Non-Shared Caches + + For reasons of security and privacy, it is necessary to make a + distinction between "shared" and "non-shared" caches. A non-shared + cache is one that is accessible only to a single user. Accessibility + in this case SHOULD be enforced by appropriate security mechanisms. + All other caches are considered to be "shared." Other sections of + + + + + +Fielding, et al. Standards Track [Page 96] + +RFC 2616 HTTP/1.1 June 1999 + + + this specification place certain constraints on the operation of + shared caches in order to prevent loss of privacy or failure of + access controls. + +13.8 Errors or Incomplete Response Cache Behavior + + A cache that receives an incomplete response (for example, with fewer + bytes of data than specified in a Content-Length header) MAY store + the response. However, the cache MUST treat this as a partial + response. Partial responses MAY be combined as described in section + 13.5.4; the result might be a full response or might still be + partial. A cache MUST NOT return a partial response to a client + without explicitly marking it as such, using the 206 (Partial + Content) status code. A cache MUST NOT return a partial response + using a status code of 200 (OK). + + If a cache receives a 5xx response while attempting to revalidate an + entry, it MAY either forward this response to the requesting client, + or act as if the server failed to respond. In the latter case, it MAY + return a previously received response unless the cached entry + includes the "must-revalidate" cache-control directive (see section + 14.9). + +13.9 Side Effects of GET and HEAD + + Unless the origin server explicitly prohibits the caching of their + responses, the application of GET and HEAD methods to any resources + SHOULD NOT have side effects that would lead to erroneous behavior if + these responses are taken from a cache. They MAY still have side + effects, but a cache is not required to consider such side effects in + its caching decisions. Caches are always expected to observe an + origin server's explicit restrictions on caching. + + We note one exception to this rule: since some applications have + traditionally used GETs and HEADs with query URLs (those containing a + "?" in the rel_path part) to perform operations with significant side + effects, caches MUST NOT treat responses to such URIs as fresh unless + the server provides an explicit expiration time. This specifically + means that responses from HTTP/1.0 servers for such URIs SHOULD NOT + be taken from a cache. See section 9.1.1 for related information. + +13.10 Invalidation After Updates or Deletions + + The effect of certain methods performed on a resource at the origin + server might cause one or more existing cache entries to become non- + transparently invalid. That is, although they might continue to be + "fresh," they do not accurately reflect what the origin server would + return for a new request on that resource. + + + +Fielding, et al. Standards Track [Page 97] + +RFC 2616 HTTP/1.1 June 1999 + + + There is no way for the HTTP protocol to guarantee that all such + cache entries are marked invalid. For example, the request that + caused the change at the origin server might not have gone through + the proxy where a cache entry is stored. However, several rules help + reduce the likelihood of erroneous behavior. + + In this section, the phrase "invalidate an entity" means that the + cache will either remove all instances of that entity from its + storage, or will mark these as "invalid" and in need of a mandatory + revalidation before they can be returned in response to a subsequent + request. + + Some HTTP methods MUST cause a cache to invalidate an entity. This is + either the entity referred to by the Request-URI, or by the Location + or Content-Location headers (if present). These methods are: + + - PUT + + - DELETE + + - POST + + In order to prevent denial of service attacks, an invalidation based + on the URI in a Location or Content-Location header MUST only be + performed if the host part is the same as in the Request-URI. + + A cache that passes through requests for methods it does not + understand SHOULD invalidate any entities referred to by the + Request-URI. + +13.11 Write-Through Mandatory + + All methods that might be expected to cause modifications to the + origin server's resources MUST be written through to the origin + server. This currently includes all methods except for GET and HEAD. + A cache MUST NOT reply to such a request from a client before having + transmitted the request to the inbound server, and having received a + corresponding response from the inbound server. This does not prevent + a proxy cache from sending a 100 (Continue) response before the + inbound server has sent its final reply. + + The alternative (known as "write-back" or "copy-back" caching) is not + allowed in HTTP/1.1, due to the difficulty of providing consistent + updates and the problems arising from server, cache, or network + failure prior to write-back. + + + + + + +Fielding, et al. Standards Track [Page 98] + +RFC 2616 HTTP/1.1 June 1999 + + +13.12 Cache Replacement + + If a new cacheable (see sections 14.9.2, 13.2.5, 13.2.6 and 13.8) + response is received from a resource while any existing responses for + the same resource are cached, the cache SHOULD use the new response + to reply to the current request. It MAY insert it into cache storage + and MAY, if it meets all other requirements, use it to respond to any + future requests that would previously have caused the old response to + be returned. If it inserts the new response into cache storage the + rules in section 13.5.3 apply. + + Note: a new response that has an older Date header value than + existing cached responses is not cacheable. + +13.13 History Lists + + User agents often have history mechanisms, such as "Back" buttons and + history lists, which can be used to redisplay an entity retrieved + earlier in a session. + + History mechanisms and caches are different. In particular history + mechanisms SHOULD NOT try to show a semantically transparent view of + the current state of a resource. Rather, a history mechanism is meant + to show exactly what the user saw at the time when the resource was + retrieved. + + By default, an expiration time does not apply to history mechanisms. + If the entity is still in storage, a history mechanism SHOULD display + it even if the entity has expired, unless the user has specifically + configured the agent to refresh expired history documents. + + This is not to be construed to prohibit the history mechanism from + telling the user that a view might be stale. + + Note: if history list mechanisms unnecessarily prevent users from + viewing stale resources, this will tend to force service authors + to avoid using HTTP expiration controls and cache controls when + they would otherwise like to. Service authors may consider it + important that users not be presented with error messages or + warning messages when they use navigation controls (such as BACK) + to view previously fetched resources. Even though sometimes such + resources ought not to cached, or ought to expire quickly, user + interface considerations may force service authors to resort to + other means of preventing caching (e.g. "once-only" URLs) in order + not to suffer the effects of improperly functioning history + mechanisms. + + + + + +Fielding, et al. Standards Track [Page 99] + +RFC 2616 HTTP/1.1 June 1999 + + +14 Header Field Definitions + + This section defines the syntax and semantics of all standard + HTTP/1.1 header fields. For entity-header fields, both sender and + recipient refer to either the client or the server, depending on who + sends and who receives the entity. + +14.1 Accept + + The Accept request-header field can be used to specify certain media + types which are acceptable for the response. Accept headers can be + used to indicate that the request is specifically limited to a small + set of desired types, as in the case of a request for an in-line + image. + + Accept = "Accept" ":" + #( media-range [ accept-params ] ) + + media-range = ( "*/*" + | ( type "/" "*" ) + | ( type "/" subtype ) + ) *( ";" parameter ) + accept-params = ";" "q" "=" qvalue *( accept-extension ) + accept-extension = ";" token [ "=" ( token | quoted-string ) ] + + The asterisk "*" character is used to group media types into ranges, + with "*/*" indicating all media types and "type/*" indicating all + subtypes of that type. The media-range MAY include media type + parameters that are applicable to that range. + + Each media-range MAY be followed by one or more accept-params, + beginning with the "q" parameter for indicating a relative quality + factor. The first "q" parameter (if any) separates the media-range + parameter(s) from the accept-params. Quality factors allow the user + or user agent to indicate the relative degree of preference for that + media-range, using the qvalue scale from 0 to 1 (section 3.9). The + default value is q=1. + + Note: Use of the "q" parameter name to separate media type + parameters from Accept extension parameters is due to historical + practice. Although this prevents any media type parameter named + "q" from being used with a media range, such an event is believed + to be unlikely given the lack of any "q" parameters in the IANA + media type registry and the rare usage of any media type + parameters in Accept. Future media types are discouraged from + registering any parameter named "q". + + + + + +Fielding, et al. Standards Track [Page 100] + +RFC 2616 HTTP/1.1 June 1999 + + + The example + + Accept: audio/*; q=0.2, audio/basic + + SHOULD be interpreted as "I prefer audio/basic, but send me any audio + type if it is the best available after an 80% mark-down in quality." + + If no Accept header field is present, then it is assumed that the + client accepts all media types. If an Accept header field is present, + and if the server cannot send a response which is acceptable + according to the combined Accept field value, then the server SHOULD + send a 406 (not acceptable) response. + + A more elaborate example is + + Accept: text/plain; q=0.5, text/html, + text/x-dvi; q=0.8, text/x-c + + Verbally, this would be interpreted as "text/html and text/x-c are + the preferred media types, but if they do not exist, then send the + text/x-dvi entity, and if that does not exist, send the text/plain + entity." + + Media ranges can be overridden by more specific media ranges or + specific media types. If more than one media range applies to a given + type, the most specific reference has precedence. For example, + + Accept: text/*, text/html, text/html;level=1, */* + + have the following precedence: + + 1) text/html;level=1 + 2) text/html + 3) text/* + 4) */* + + The media type quality factor associated with a given type is + determined by finding the media range with the highest precedence + which matches that type. For example, + + Accept: text/*;q=0.3, text/html;q=0.7, text/html;level=1, + text/html;level=2;q=0.4, */*;q=0.5 + + would cause the following values to be associated: + + text/html;level=1 = 1 + text/html = 0.7 + text/plain = 0.3 + + + +Fielding, et al. Standards Track [Page 101] + +RFC 2616 HTTP/1.1 June 1999 + + + image/jpeg = 0.5 + text/html;level=2 = 0.4 + text/html;level=3 = 0.7 + + Note: A user agent might be provided with a default set of quality + values for certain media ranges. However, unless the user agent is + a closed system which cannot interact with other rendering agents, + this default set ought to be configurable by the user. + +14.2 Accept-Charset + + The Accept-Charset request-header field can be used to indicate what + character sets are acceptable for the response. This field allows + clients capable of understanding more comprehensive or special- + purpose character sets to signal that capability to a server which is + capable of representing documents in those character sets. + + Accept-Charset = "Accept-Charset" ":" + 1#( ( charset | "*" )[ ";" "q" "=" qvalue ] ) + + + Character set values are described in section 3.4. Each charset MAY + be given an associated quality value which represents the user's + preference for that charset. The default value is q=1. An example is + + Accept-Charset: iso-8859-5, unicode-1-1;q=0.8 + + The special value "*", if present in the Accept-Charset field, + matches every character set (including ISO-8859-1) which is not + mentioned elsewhere in the Accept-Charset field. If no "*" is present + in an Accept-Charset field, then all character sets not explicitly + mentioned get a quality value of 0, except for ISO-8859-1, which gets + a quality value of 1 if not explicitly mentioned. + + If no Accept-Charset header is present, the default is that any + character set is acceptable. If an Accept-Charset header is present, + and if the server cannot send a response which is acceptable + according to the Accept-Charset header, then the server SHOULD send + an error response with the 406 (not acceptable) status code, though + the sending of an unacceptable response is also allowed. + +14.3 Accept-Encoding + + The Accept-Encoding request-header field is similar to Accept, but + restricts the content-codings (section 3.5) that are acceptable in + the response. + + Accept-Encoding = "Accept-Encoding" ":" + + + +Fielding, et al. Standards Track [Page 102] + +RFC 2616 HTTP/1.1 June 1999 + + + 1#( codings [ ";" "q" "=" qvalue ] ) + codings = ( content-coding | "*" ) + + Examples of its use are: + + Accept-Encoding: compress, gzip + Accept-Encoding: + Accept-Encoding: * + Accept-Encoding: compress;q=0.5, gzip;q=1.0 + Accept-Encoding: gzip;q=1.0, identity; q=0.5, *;q=0 + + A server tests whether a content-coding is acceptable, according to + an Accept-Encoding field, using these rules: + + 1. If the content-coding is one of the content-codings listed in + the Accept-Encoding field, then it is acceptable, unless it is + accompanied by a qvalue of 0. (As defined in section 3.9, a + qvalue of 0 means "not acceptable.") + + 2. The special "*" symbol in an Accept-Encoding field matches any + available content-coding not explicitly listed in the header + field. + + 3. If multiple content-codings are acceptable, then the acceptable + content-coding with the highest non-zero qvalue is preferred. + + 4. The "identity" content-coding is always acceptable, unless + specifically refused because the Accept-Encoding field includes + "identity;q=0", or because the field includes "*;q=0" and does + not explicitly include the "identity" content-coding. If the + Accept-Encoding field-value is empty, then only the "identity" + encoding is acceptable. + + If an Accept-Encoding field is present in a request, and if the + server cannot send a response which is acceptable according to the + Accept-Encoding header, then the server SHOULD send an error response + with the 406 (Not Acceptable) status code. + + If no Accept-Encoding field is present in a request, the server MAY + assume that the client will accept any content coding. In this case, + if "identity" is one of the available content-codings, then the + server SHOULD use the "identity" content-coding, unless it has + additional information that a different content-coding is meaningful + to the client. + + Note: If the request does not include an Accept-Encoding field, + and if the "identity" content-coding is unavailable, then + content-codings commonly understood by HTTP/1.0 clients (i.e., + + + +Fielding, et al. Standards Track [Page 103] + +RFC 2616 HTTP/1.1 June 1999 + + + "gzip" and "compress") are preferred; some older clients + improperly display messages sent with other content-codings. The + server might also make this decision based on information about + the particular user-agent or client. + + Note: Most HTTP/1.0 applications do not recognize or obey qvalues + associated with content-codings. This means that qvalues will not + work and are not permitted with x-gzip or x-compress. + +14.4 Accept-Language + + The Accept-Language request-header field is similar to Accept, but + restricts the set of natural languages that are preferred as a + response to the request. Language tags are defined in section 3.10. + + Accept-Language = "Accept-Language" ":" + 1#( language-range [ ";" "q" "=" qvalue ] ) + language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" ) + + Each language-range MAY be given an associated quality value which + represents an estimate of the user's preference for the languages + specified by that range. The quality value defaults to "q=1". For + example, + + Accept-Language: da, en-gb;q=0.8, en;q=0.7 + + would mean: "I prefer Danish, but will accept British English and + other types of English." A language-range matches a language-tag if + it exactly equals the tag, or if it exactly equals a prefix of the + tag such that the first tag character following the prefix is "-". + The special range "*", if present in the Accept-Language field, + matches every tag not matched by any other range present in the + Accept-Language field. + + Note: This use of a prefix matching rule does not imply that + language tags are assigned to languages in such a way that it is + always true that if a user understands a language with a certain + tag, then this user will also understand all languages with tags + for which this tag is a prefix. The prefix rule simply allows the + use of prefix tags if this is the case. + + The language quality factor assigned to a language-tag by the + Accept-Language field is the quality value of the longest language- + range in the field that matches the language-tag. If no language- + range in the field matches the tag, the language quality factor + assigned is 0. If no Accept-Language header is present in the + request, the server + + + + +Fielding, et al. Standards Track [Page 104] + +RFC 2616 HTTP/1.1 June 1999 + + + SHOULD assume that all languages are equally acceptable. If an + Accept-Language header is present, then all languages which are + assigned a quality factor greater than 0 are acceptable. + + It might be contrary to the privacy expectations of the user to send + an Accept-Language header with the complete linguistic preferences of + the user in every request. For a discussion of this issue, see + section 15.1.4. + + As intelligibility is highly dependent on the individual user, it is + recommended that client applications make the choice of linguistic + preference available to the user. If the choice is not made + available, then the Accept-Language header field MUST NOT be given in + the request. + + Note: When making the choice of linguistic preference available to + the user, we remind implementors of the fact that users are not + familiar with the details of language matching as described above, + and should provide appropriate guidance. As an example, users + might assume that on selecting "en-gb", they will be served any + kind of English document if British English is not available. A + user agent might suggest in such a case to add "en" to get the + best matching behavior. + +14.5 Accept-Ranges + + The Accept-Ranges response-header field allows the server to + indicate its acceptance of range requests for a resource: + + Accept-Ranges = "Accept-Ranges" ":" acceptable-ranges + acceptable-ranges = 1#range-unit | "none" + + Origin servers that accept byte-range requests MAY send + + Accept-Ranges: bytes + + but are not required to do so. Clients MAY generate byte-range + requests without having received this header for the resource + involved. Range units are defined in section 3.12. + + Servers that do not accept any kind of range request for a + resource MAY send + + Accept-Ranges: none + + to advise the client not to attempt a range request. + + + + + +Fielding, et al. Standards Track [Page 105] + +RFC 2616 HTTP/1.1 June 1999 + + +14.6 Age + + The Age response-header field conveys the sender's estimate of the + amount of time since the response (or its revalidation) was + generated at the origin server. A cached response is "fresh" if + its age does not exceed its freshness lifetime. Age values are + calculated as specified in section 13.2.3. + + Age = "Age" ":" age-value + age-value = delta-seconds + + Age values are non-negative decimal integers, representing time in + seconds. + + If a cache receives a value larger than the largest positive + integer it can represent, or if any of its age calculations + overflows, it MUST transmit an Age header with a value of + 2147483648 (2^31). An HTTP/1.1 server that includes a cache MUST + include an Age header field in every response generated from its + own cache. Caches SHOULD use an arithmetic type of at least 31 + bits of range. + +14.7 Allow + + The Allow entity-header field lists the set of methods supported + by the resource identified by the Request-URI. The purpose of this + field is strictly to inform the recipient of valid methods + associated with the resource. An Allow header field MUST be + present in a 405 (Method Not Allowed) response. + + Allow = "Allow" ":" #Method + + Example of use: + + Allow: GET, HEAD, PUT + + This field cannot prevent a client from trying other methods. + However, the indications given by the Allow header field value + SHOULD be followed. The actual set of allowed methods is defined + by the origin server at the time of each request. + + The Allow header field MAY be provided with a PUT request to + recommend the methods to be supported by the new or modified + resource. The server is not required to support these methods and + SHOULD include an Allow header in the response giving the actual + supported methods. + + + + + +Fielding, et al. Standards Track [Page 106] + +RFC 2616 HTTP/1.1 June 1999 + + + A proxy MUST NOT modify the Allow header field even if it does not + understand all the methods specified, since the user agent might + have other means of communicating with the origin server. + +14.8 Authorization + + A user agent that wishes to authenticate itself with a server-- + usually, but not necessarily, after receiving a 401 response--does + so by including an Authorization request-header field with the + request. The Authorization field value consists of credentials + containing the authentication information of the user agent for + the realm of the resource being requested. + + Authorization = "Authorization" ":" credentials + + HTTP access authentication is described in "HTTP Authentication: + Basic and Digest Access Authentication" [43]. If a request is + authenticated and a realm specified, the same credentials SHOULD + be valid for all other requests within this realm (assuming that + the authentication scheme itself does not require otherwise, such + as credentials that vary according to a challenge value or using + synchronized clocks). + + When a shared cache (see section 13.7) receives a request + containing an Authorization field, it MUST NOT return the + corresponding response as a reply to any other request, unless one + of the following specific exceptions holds: + + 1. If the response includes the "s-maxage" cache-control + directive, the cache MAY use that response in replying to a + subsequent request. But (if the specified maximum age has + passed) a proxy cache MUST first revalidate it with the origin + server, using the request-headers from the new request to allow + the origin server to authenticate the new request. (This is the + defined behavior for s-maxage.) If the response includes "s- + maxage=0", the proxy MUST always revalidate it before re-using + it. + + 2. If the response includes the "must-revalidate" cache-control + directive, the cache MAY use that response in replying to a + subsequent request. But if the response is stale, all caches + MUST first revalidate it with the origin server, using the + request-headers from the new request to allow the origin server + to authenticate the new request. + + 3. If the response includes the "public" cache-control directive, + it MAY be returned in reply to any subsequent request. + + + + +Fielding, et al. Standards Track [Page 107] + +RFC 2616 HTTP/1.1 June 1999 + + +14.9 Cache-Control + + The Cache-Control general-header field is used to specify directives + that MUST be obeyed by all caching mechanisms along the + request/response chain. The directives specify behavior intended to + prevent caches from adversely interfering with the request or + response. These directives typically override the default caching + algorithms. Cache directives are unidirectional in that the presence + of a directive in a request does not imply that the same directive is + to be given in the response. + + Note that HTTP/1.0 caches might not implement Cache-Control and + might only implement Pragma: no-cache (see section 14.32). + + Cache directives MUST be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives might be applicable to all recipients along the + request/response chain. It is not possible to specify a cache- + directive for a specific cache. + + Cache-Control = "Cache-Control" ":" 1#cache-directive + + cache-directive = cache-request-directive + | cache-response-directive + + cache-request-directive = + "no-cache" ; Section 14.9.1 + | "no-store" ; Section 14.9.2 + | "max-age" "=" delta-seconds ; Section 14.9.3, 14.9.4 + | "max-stale" [ "=" delta-seconds ] ; Section 14.9.3 + | "min-fresh" "=" delta-seconds ; Section 14.9.3 + | "no-transform" ; Section 14.9.5 + | "only-if-cached" ; Section 14.9.4 + | cache-extension ; Section 14.9.6 + + cache-response-directive = + "public" ; Section 14.9.1 + | "private" [ "=" <"> 1#field-name <"> ] ; Section 14.9.1 + | "no-cache" [ "=" <"> 1#field-name <"> ]; Section 14.9.1 + | "no-store" ; Section 14.9.2 + | "no-transform" ; Section 14.9.5 + | "must-revalidate" ; Section 14.9.4 + | "proxy-revalidate" ; Section 14.9.4 + | "max-age" "=" delta-seconds ; Section 14.9.3 + | "s-maxage" "=" delta-seconds ; Section 14.9.3 + | cache-extension ; Section 14.9.6 + + cache-extension = token [ "=" ( token | quoted-string ) ] + + + +Fielding, et al. Standards Track [Page 108] + +RFC 2616 HTTP/1.1 June 1999 + + + When a directive appears without any 1#field-name parameter, the + directive applies to the entire request or response. When such a + directive appears with a 1#field-name parameter, it applies only to + the named field or fields, and not to the rest of the request or + response. This mechanism supports extensibility; implementations of + future versions of the HTTP protocol might apply these directives to + header fields not defined in HTTP/1.1. + + The cache-control directives can be broken down into these general + categories: + + - Restrictions on what are cacheable; these may only be imposed by + the origin server. + + - Restrictions on what may be stored by a cache; these may be + imposed by either the origin server or the user agent. + + - Modifications of the basic expiration mechanism; these may be + imposed by either the origin server or the user agent. + + - Controls over cache revalidation and reload; these may only be + imposed by a user agent. + + - Control over transformation of entities. + + - Extensions to the caching system. + +14.9.1 What is Cacheable + + By default, a response is cacheable if the requirements of the + request method, request header fields, and the response status + indicate that it is cacheable. Section 13.4 summarizes these defaults + for cacheability. The following Cache-Control response directives + allow an origin server to override the default cacheability of a + response: + + public + Indicates that the response MAY be cached by any cache, even if it + would normally be non-cacheable or cacheable only within a non- + shared cache. (See also Authorization, section 14.8, for + additional details.) + + private + Indicates that all or part of the response message is intended for + a single user and MUST NOT be cached by a shared cache. This + allows an origin server to state that the specified parts of the + + + + + +Fielding, et al. Standards Track [Page 109] + +RFC 2616 HTTP/1.1 June 1999 + + + response are intended for only one user and are not a valid + response for requests by other users. A private (non-shared) cache + MAY cache the response. + + Note: This usage of the word private only controls where the + response may be cached, and cannot ensure the privacy of the + message content. + + no-cache + If the no-cache directive does not specify a field-name, then a + cache MUST NOT use the response to satisfy a subsequent request + without successful revalidation with the origin server. This + allows an origin server to prevent caching even by caches that + have been configured to return stale responses to client requests. + + If the no-cache directive does specify one or more field-names, + then a cache MAY use the response to satisfy a subsequent request, + subject to any other restrictions on caching. However, the + specified field-name(s) MUST NOT be sent in the response to a + subsequent request without successful revalidation with the origin + server. This allows an origin server to prevent the re-use of + certain header fields in a response, while still allowing caching + of the rest of the response. + + Note: Most HTTP/1.0 caches will not recognize or obey this + directive. + +14.9.2 What May be Stored by Caches + + no-store + The purpose of the no-store directive is to prevent the + inadvertent release or retention of sensitive information (for + example, on backup tapes). The no-store directive applies to the + entire message, and MAY be sent either in a response or in a + request. If sent in a request, a cache MUST NOT store any part of + either this request or any response to it. If sent in a response, + a cache MUST NOT store any part of either this response or the + request that elicited it. This directive applies to both non- + shared and shared caches. "MUST NOT store" in this context means + that the cache MUST NOT intentionally store the information in + non-volatile storage, and MUST make a best-effort attempt to + remove the information from volatile storage as promptly as + possible after forwarding it. + + Even when this directive is associated with a response, users + might explicitly store such a response outside of the caching + system (e.g., with a "Save As" dialog). History buffers MAY store + such responses as part of their normal operation. + + + +Fielding, et al. Standards Track [Page 110] + +RFC 2616 HTTP/1.1 June 1999 + + + The purpose of this directive is to meet the stated requirements + of certain users and service authors who are concerned about + accidental releases of information via unanticipated accesses to + cache data structures. While the use of this directive might + improve privacy in some cases, we caution that it is NOT in any + way a reliable or sufficient mechanism for ensuring privacy. In + particular, malicious or compromised caches might not recognize or + obey this directive, and communications networks might be + vulnerable to eavesdropping. + +14.9.3 Modifications of the Basic Expiration Mechanism + + The expiration time of an entity MAY be specified by the origin + server using the Expires header (see section 14.21). Alternatively, + it MAY be specified using the max-age directive in a response. When + the max-age cache-control directive is present in a cached response, + the response is stale if its current age is greater than the age + value given (in seconds) at the time of a new request for that + resource. The max-age directive on a response implies that the + response is cacheable (i.e., "public") unless some other, more + restrictive cache directive is also present. + + If a response includes both an Expires header and a max-age + directive, the max-age directive overrides the Expires header, even + if the Expires header is more restrictive. This rule allows an origin + server to provide, for a given response, a longer expiration time to + an HTTP/1.1 (or later) cache than to an HTTP/1.0 cache. This might be + useful if certain HTTP/1.0 caches improperly calculate ages or + expiration times, perhaps due to desynchronized clocks. + + Many HTTP/1.0 cache implementations will treat an Expires value that + is less than or equal to the response Date value as being equivalent + to the Cache-Control response directive "no-cache". If an HTTP/1.1 + cache receives such a response, and the response does not include a + Cache-Control header field, it SHOULD consider the response to be + non-cacheable in order to retain compatibility with HTTP/1.0 servers. + + Note: An origin server might wish to use a relatively new HTTP + cache control feature, such as the "private" directive, on a + network including older caches that do not understand that + feature. The origin server will need to combine the new feature + with an Expires field whose value is less than or equal to the + Date value. This will prevent older caches from improperly + caching the response. + + + + + + + +Fielding, et al. Standards Track [Page 111] + +RFC 2616 HTTP/1.1 June 1999 + + + s-maxage + If a response includes an s-maxage directive, then for a shared + cache (but not for a private cache), the maximum age specified by + this directive overrides the maximum age specified by either the + max-age directive or the Expires header. The s-maxage directive + also implies the semantics of the proxy-revalidate directive (see + section 14.9.4), i.e., that the shared cache must not use the + entry after it becomes stale to respond to a subsequent request + without first revalidating it with the origin server. The s- + maxage directive is always ignored by a private cache. + + Note that most older caches, not compliant with this specification, + do not implement any cache-control directives. An origin server + wishing to use a cache-control directive that restricts, but does not + prevent, caching by an HTTP/1.1-compliant cache MAY exploit the + requirement that the max-age directive overrides the Expires header, + and the fact that pre-HTTP/1.1-compliant caches do not observe the + max-age directive. + + Other directives allow a user agent to modify the basic expiration + mechanism. These directives MAY be specified on a request: + + max-age + Indicates that the client is willing to accept a response whose + age is no greater than the specified time in seconds. Unless max- + stale directive is also included, the client is not willing to + accept a stale response. + + min-fresh + Indicates that the client is willing to accept a response whose + freshness lifetime is no less than its current age plus the + specified time in seconds. That is, the client wants a response + that will still be fresh for at least the specified number of + seconds. + + max-stale + Indicates that the client is willing to accept a response that has + exceeded its expiration time. If max-stale is assigned a value, + then the client is willing to accept a response that has exceeded + its expiration time by no more than the specified number of + seconds. If no value is assigned to max-stale, then the client is + willing to accept a stale response of any age. + + If a cache returns a stale response, either because of a max-stale + directive on a request, or because the cache is configured to + override the expiration time of a response, the cache MUST attach a + Warning header to the stale response, using Warning 110 (Response is + stale). + + + +Fielding, et al. Standards Track [Page 112] + +RFC 2616 HTTP/1.1 June 1999 + + + A cache MAY be configured to return stale responses without + validation, but only if this does not conflict with any "MUST"-level + requirements concerning cache validation (e.g., a "must-revalidate" + cache-control directive). + + If both the new request and the cached entry include "max-age" + directives, then the lesser of the two values is used for determining + the freshness of the cached entry for that request. + +14.9.4 Cache Revalidation and Reload Controls + + Sometimes a user agent might want or need to insist that a cache + revalidate its cache entry with the origin server (and not just with + the next cache along the path to the origin server), or to reload its + cache entry from the origin server. End-to-end revalidation might be + necessary if either the cache or the origin server has overestimated + the expiration time of the cached response. End-to-end reload may be + necessary if the cache entry has become corrupted for some reason. + + End-to-end revalidation may be requested either when the client does + not have its own local cached copy, in which case we call it + "unspecified end-to-end revalidation", or when the client does have a + local cached copy, in which case we call it "specific end-to-end + revalidation." + + The client can specify these three kinds of action using Cache- + Control request directives: + + End-to-end reload + The request includes a "no-cache" cache-control directive or, for + compatibility with HTTP/1.0 clients, "Pragma: no-cache". Field + names MUST NOT be included with the no-cache directive in a + request. The server MUST NOT use a cached copy when responding to + such a request. + + Specific end-to-end revalidation + The request includes a "max-age=0" cache-control directive, which + forces each cache along the path to the origin server to + revalidate its own entry, if any, with the next cache or server. + The initial request includes a cache-validating conditional with + the client's current validator. + + Unspecified end-to-end revalidation + The request includes "max-age=0" cache-control directive, which + forces each cache along the path to the origin server to + revalidate its own entry, if any, with the next cache or server. + The initial request does not include a cache-validating + + + + +Fielding, et al. Standards Track [Page 113] + +RFC 2616 HTTP/1.1 June 1999 + + + conditional; the first cache along the path (if any) that holds a + cache entry for this resource includes a cache-validating + conditional with its current validator. + + max-age + When an intermediate cache is forced, by means of a max-age=0 + directive, to revalidate its own cache entry, and the client has + supplied its own validator in the request, the supplied validator + might differ from the validator currently stored with the cache + entry. In this case, the cache MAY use either validator in making + its own request without affecting semantic transparency. + + However, the choice of validator might affect performance. The + best approach is for the intermediate cache to use its own + validator when making its request. If the server replies with 304 + (Not Modified), then the cache can return its now validated copy + to the client with a 200 (OK) response. If the server replies with + a new entity and cache validator, however, the intermediate cache + can compare the returned validator with the one provided in the + client's request, using the strong comparison function. If the + client's validator is equal to the origin server's, then the + intermediate cache simply returns 304 (Not Modified). Otherwise, + it returns the new entity with a 200 (OK) response. + + If a request includes the no-cache directive, it SHOULD NOT + include min-fresh, max-stale, or max-age. + + only-if-cached + In some cases, such as times of extremely poor network + connectivity, a client may want a cache to return only those + responses that it currently has stored, and not to reload or + revalidate with the origin server. To do this, the client may + include the only-if-cached directive in a request. If it receives + this directive, a cache SHOULD either respond using a cached entry + that is consistent with the other constraints of the request, or + respond with a 504 (Gateway Timeout) status. However, if a group + of caches is being operated as a unified system with good internal + connectivity, such a request MAY be forwarded within that group of + caches. + + must-revalidate + Because a cache MAY be configured to ignore a server's specified + expiration time, and because a client request MAY include a max- + stale directive (which has a similar effect), the protocol also + includes a mechanism for the origin server to require revalidation + of a cache entry on any subsequent use. When the must-revalidate + directive is present in a response received by a cache, that cache + MUST NOT use the entry after it becomes stale to respond to a + + + +Fielding, et al. Standards Track [Page 114] + +RFC 2616 HTTP/1.1 June 1999 + + + subsequent request without first revalidating it with the origin + server. (I.e., the cache MUST do an end-to-end revalidation every + time, if, based solely on the origin server's Expires or max-age + value, the cached response is stale.) + + The must-revalidate directive is necessary to support reliable + operation for certain protocol features. In all circumstances an + HTTP/1.1 cache MUST obey the must-revalidate directive; in + particular, if the cache cannot reach the origin server for any + reason, it MUST generate a 504 (Gateway Timeout) response. + + Servers SHOULD send the must-revalidate directive if and only if + failure to revalidate a request on the entity could result in + incorrect operation, such as a silently unexecuted financial + transaction. Recipients MUST NOT take any automated action that + violates this directive, and MUST NOT automatically provide an + unvalidated copy of the entity if revalidation fails. + + Although this is not recommended, user agents operating under + severe connectivity constraints MAY violate this directive but, if + so, MUST explicitly warn the user that an unvalidated response has + been provided. The warning MUST be provided on each unvalidated + access, and SHOULD require explicit user confirmation. + + proxy-revalidate + The proxy-revalidate directive has the same meaning as the must- + revalidate directive, except that it does not apply to non-shared + user agent caches. It can be used on a response to an + authenticated request to permit the user's cache to store and + later return the response without needing to revalidate it (since + it has already been authenticated once by that user), while still + requiring proxies that service many users to revalidate each time + (in order to make sure that each user has been authenticated). + Note that such authenticated responses also need the public cache + control directive in order to allow them to be cached at all. + +14.9.5 No-Transform Directive + + no-transform + Implementors of intermediate caches (proxies) have found it useful + to convert the media type of certain entity bodies. A non- + transparent proxy might, for example, convert between image + formats in order to save cache space or to reduce the amount of + traffic on a slow link. + + Serious operational problems occur, however, when these + transformations are applied to entity bodies intended for certain + kinds of applications. For example, applications for medical + + + +Fielding, et al. Standards Track [Page 115] + +RFC 2616 HTTP/1.1 June 1999 + + + imaging, scientific data analysis and those using end-to-end + authentication, all depend on receiving an entity body that is bit + for bit identical to the original entity-body. + + Therefore, if a message includes the no-transform directive, an + intermediate cache or proxy MUST NOT change those headers that are + listed in section 13.5.2 as being subject to the no-transform + directive. This implies that the cache or proxy MUST NOT change + any aspect of the entity-body that is specified by these headers, + including the value of the entity-body itself. + +14.9.6 Cache Control Extensions + + The Cache-Control header field can be extended through the use of one + or more cache-extension tokens, each with an optional assigned value. + Informational extensions (those which do not require a change in + cache behavior) MAY be added without changing the semantics of other + directives. Behavioral extensions are designed to work by acting as + modifiers to the existing base of cache directives. Both the new + directive and the standard directive are supplied, such that + applications which do not understand the new directive will default + to the behavior specified by the standard directive, and those that + understand the new directive will recognize it as modifying the + requirements associated with the standard directive. In this way, + extensions to the cache-control directives can be made without + requiring changes to the base protocol. + + This extension mechanism depends on an HTTP cache obeying all of the + cache-control directives defined for its native HTTP-version, obeying + certain extensions, and ignoring all directives that it does not + understand. + + For example, consider a hypothetical new response directive called + community which acts as a modifier to the private directive. We + define this new directive to mean that, in addition to any non-shared + cache, any cache which is shared only by members of the community + named within its value may cache the response. An origin server + wishing to allow the UCI community to use an otherwise private + response in their shared cache(s) could do so by including + + Cache-Control: private, community="UCI" + + A cache seeing this header field will act correctly even if the cache + does not understand the community cache-extension, since it will also + see and understand the private directive and thus default to the safe + behavior. + + + + + +Fielding, et al. Standards Track [Page 116] + +RFC 2616 HTTP/1.1 June 1999 + + + Unrecognized cache-directives MUST be ignored; it is assumed that any + cache-directive likely to be unrecognized by an HTTP/1.1 cache will + be combined with standard directives (or the response's default + cacheability) such that the cache behavior will remain minimally + correct even if the cache does not understand the extension(s). + +14.10 Connection + + The Connection general-header field allows the sender to specify + options that are desired for that particular connection and MUST NOT + be communicated by proxies over further connections. + + The Connection header has the following grammar: + + Connection = "Connection" ":" 1#(connection-token) + connection-token = token + + HTTP/1.1 proxies MUST parse the Connection header field before a + message is forwarded and, for each connection-token in this field, + remove any header field(s) from the message with the same name as the + connection-token. Connection options are signaled by the presence of + a connection-token in the Connection header field, not by any + corresponding additional header field(s), since the additional header + field may not be sent if there are no parameters associated with that + connection option. + + Message headers listed in the Connection header MUST NOT include + end-to-end headers, such as Cache-Control. + + HTTP/1.1 defines the "close" connection option for the sender to + signal that the connection will be closed after completion of the + response. For example, + + Connection: close + + in either the request or the response header fields indicates that + the connection SHOULD NOT be considered `persistent' (section 8.1) + after the current request/response is complete. + + HTTP/1.1 applications that do not support persistent connections MUST + include the "close" connection option in every message. + + A system receiving an HTTP/1.0 (or lower-version) message that + includes a Connection header MUST, for each connection-token in this + field, remove and ignore any header field(s) from the message with + the same name as the connection-token. This protects against mistaken + forwarding of such header fields by pre-HTTP/1.1 proxies. See section + 19.6.2. + + + +Fielding, et al. Standards Track [Page 117] + +RFC 2616 HTTP/1.1 June 1999 + + +14.11 Content-Encoding + + The Content-Encoding entity-header field is used as a modifier to the + media-type. When present, its value indicates what additional content + codings have been applied to the entity-body, and thus what decoding + mechanisms must be applied in order to obtain the media-type + referenced by the Content-Type header field. Content-Encoding is + primarily used to allow a document to be compressed without losing + the identity of its underlying media type. + + Content-Encoding = "Content-Encoding" ":" 1#content-coding + + Content codings are defined in section 3.5. An example of its use is + + Content-Encoding: gzip + + The content-coding is a characteristic of the entity identified by + the Request-URI. Typically, the entity-body is stored with this + encoding and is only decoded before rendering or analogous usage. + However, a non-transparent proxy MAY modify the content-coding if the + new coding is known to be acceptable to the recipient, unless the + "no-transform" cache-control directive is present in the message. + + If the content-coding of an entity is not "identity", then the + response MUST include a Content-Encoding entity-header (section + 14.11) that lists the non-identity content-coding(s) used. + + If the content-coding of an entity in a request message is not + acceptable to the origin server, the server SHOULD respond with a + status code of 415 (Unsupported Media Type). + + If multiple encodings have been applied to an entity, the content + codings MUST be listed in the order in which they were applied. + Additional information about the encoding parameters MAY be provided + by other entity-header fields not defined by this specification. + +14.12 Content-Language + + The Content-Language entity-header field describes the natural + language(s) of the intended audience for the enclosed entity. Note + that this might not be equivalent to all the languages used within + the entity-body. + + Content-Language = "Content-Language" ":" 1#language-tag + + + + + + + +Fielding, et al. Standards Track [Page 118] + +RFC 2616 HTTP/1.1 June 1999 + + + Language tags are defined in section 3.10. The primary purpose of + Content-Language is to allow a user to identify and differentiate + entities according to the user's own preferred language. Thus, if the + body content is intended only for a Danish-literate audience, the + appropriate field is + + Content-Language: da + + If no Content-Language is specified, the default is that the content + is intended for all language audiences. This might mean that the + sender does not consider it to be specific to any natural language, + or that the sender does not know for which language it is intended. + + Multiple languages MAY be listed for content that is intended for + multiple audiences. For example, a rendition of the "Treaty of + Waitangi," presented simultaneously in the original Maori and English + versions, would call for + + Content-Language: mi, en + + However, just because multiple languages are present within an entity + does not mean that it is intended for multiple linguistic audiences. + An example would be a beginner's language primer, such as "A First + Lesson in Latin," which is clearly intended to be used by an + English-literate audience. In this case, the Content-Language would + properly only include "en". + + Content-Language MAY be applied to any media type -- it is not + limited to textual documents. + +14.13 Content-Length + + The Content-Length entity-header field indicates the size of the + entity-body, in decimal number of OCTETs, sent to the recipient or, + in the case of the HEAD method, the size of the entity-body that + would have been sent had the request been a GET. + + Content-Length = "Content-Length" ":" 1*DIGIT + + An example is + + Content-Length: 3495 + + Applications SHOULD use this field to indicate the transfer-length of + the message-body, unless this is prohibited by the rules in section + 4.4. + + + + + +Fielding, et al. Standards Track [Page 119] + +RFC 2616 HTTP/1.1 June 1999 + + + Any Content-Length greater than or equal to zero is a valid value. + Section 4.4 describes how to determine the length of a message-body + if a Content-Length is not given. + + Note that the meaning of this field is significantly different from + the corresponding definition in MIME, where it is an optional field + used within the "message/external-body" content-type. In HTTP, it + SHOULD be sent whenever the message's length can be determined prior + to being transferred, unless this is prohibited by the rules in + section 4.4. + +14.14 Content-Location + + The Content-Location entity-header field MAY be used to supply the + resource location for the entity enclosed in the message when that + entity is accessible from a location separate from the requested + resource's URI. A server SHOULD provide a Content-Location for the + variant corresponding to the response entity; especially in the case + where a resource has multiple entities associated with it, and those + entities actually have separate locations by which they might be + individually accessed, the server SHOULD provide a Content-Location + for the particular variant which is returned. + + Content-Location = "Content-Location" ":" + ( absoluteURI | relativeURI ) + + The value of Content-Location also defines the base URI for the + entity. + + The Content-Location value is not a replacement for the original + requested URI; it is only a statement of the location of the resource + corresponding to this particular entity at the time of the request. + Future requests MAY specify the Content-Location URI as the request- + URI if the desire is to identify the source of that particular + entity. + + A cache cannot assume that an entity with a Content-Location + different from the URI used to retrieve it can be used to respond to + later requests on that Content-Location URI. However, the Content- + Location can be used to differentiate between multiple entities + retrieved from a single requested resource, as described in section + 13.6. + + If the Content-Location is a relative URI, the relative URI is + interpreted relative to the Request-URI. + + The meaning of the Content-Location header in PUT or POST requests is + undefined; servers are free to ignore it in those cases. + + + +Fielding, et al. Standards Track [Page 120] + +RFC 2616 HTTP/1.1 June 1999 + + +14.15 Content-MD5 + + The Content-MD5 entity-header field, as defined in RFC 1864 [23], is + an MD5 digest of the entity-body for the purpose of providing an + end-to-end message integrity check (MIC) of the entity-body. (Note: a + MIC is good for detecting accidental modification of the entity-body + in transit, but is not proof against malicious attacks.) + + Content-MD5 = "Content-MD5" ":" md5-digest + md5-digest = + + The Content-MD5 header field MAY be generated by an origin server or + client to function as an integrity check of the entity-body. Only + origin servers or clients MAY generate the Content-MD5 header field; + proxies and gateways MUST NOT generate it, as this would defeat its + value as an end-to-end integrity check. Any recipient of the entity- + body, including gateways and proxies, MAY check that the digest value + in this header field matches that of the entity-body as received. + + The MD5 digest is computed based on the content of the entity-body, + including any content-coding that has been applied, but not including + any transfer-encoding applied to the message-body. If the message is + received with a transfer-encoding, that encoding MUST be removed + prior to checking the Content-MD5 value against the received entity. + + This has the result that the digest is computed on the octets of the + entity-body exactly as, and in the order that, they would be sent if + no transfer-encoding were being applied. + + HTTP extends RFC 1864 to permit the digest to be computed for MIME + composite media-types (e.g., multipart/* and message/rfc822), but + this does not change how the digest is computed as defined in the + preceding paragraph. + + There are several consequences of this. The entity-body for composite + types MAY contain many body-parts, each with its own MIME and HTTP + headers (including Content-MD5, Content-Transfer-Encoding, and + Content-Encoding headers). If a body-part has a Content-Transfer- + Encoding or Content-Encoding header, it is assumed that the content + of the body-part has had the encoding applied, and the body-part is + included in the Content-MD5 digest as is -- i.e., after the + application. The Transfer-Encoding header field is not allowed within + body-parts. + + Conversion of all line breaks to CRLF MUST NOT be done before + computing or checking the digest: the line break convention used in + the text actually transmitted MUST be left unaltered when computing + the digest. + + + +Fielding, et al. Standards Track [Page 121] + +RFC 2616 HTTP/1.1 June 1999 + + + Note: while the definition of Content-MD5 is exactly the same for + HTTP as in RFC 1864 for MIME entity-bodies, there are several ways + in which the application of Content-MD5 to HTTP entity-bodies + differs from its application to MIME entity-bodies. One is that + HTTP, unlike MIME, does not use Content-Transfer-Encoding, and + does use Transfer-Encoding and Content-Encoding. Another is that + HTTP more frequently uses binary content types than MIME, so it is + worth noting that, in such cases, the byte order used to compute + the digest is the transmission byte order defined for the type. + Lastly, HTTP allows transmission of text types with any of several + line break conventions and not just the canonical form using CRLF. + +14.16 Content-Range + + The Content-Range entity-header is sent with a partial entity-body to + specify where in the full entity-body the partial body should be + applied. Range units are defined in section 3.12. + + Content-Range = "Content-Range" ":" content-range-spec + + content-range-spec = byte-content-range-spec + byte-content-range-spec = bytes-unit SP + byte-range-resp-spec "/" + ( instance-length | "*" ) + + byte-range-resp-spec = (first-byte-pos "-" last-byte-pos) + | "*" + instance-length = 1*DIGIT + + The header SHOULD indicate the total length of the full entity-body, + unless this length is unknown or difficult to determine. The asterisk + "*" character means that the instance-length is unknown at the time + when the response was generated. + + Unlike byte-ranges-specifier values (see section 14.35.1), a byte- + range-resp-spec MUST only specify one range, and MUST contain + absolute byte positions for both the first and last byte of the + range. + + A byte-content-range-spec with a byte-range-resp-spec whose last- + byte-pos value is less than its first-byte-pos value, or whose + instance-length value is less than or equal to its last-byte-pos + value, is invalid. The recipient of an invalid byte-content-range- + spec MUST ignore it and any content transferred along with it. + + A server sending a response with status code 416 (Requested range not + satisfiable) SHOULD include a Content-Range field with a byte-range- + resp-spec of "*". The instance-length specifies the current length of + + + +Fielding, et al. Standards Track [Page 122] + +RFC 2616 HTTP/1.1 June 1999 + + + the selected resource. A response with status code 206 (Partial + Content) MUST NOT include a Content-Range field with a byte-range- + resp-spec of "*". + + Examples of byte-content-range-spec values, assuming that the entity + contains a total of 1234 bytes: + + . The first 500 bytes: + bytes 0-499/1234 + + . The second 500 bytes: + bytes 500-999/1234 + + . All except for the first 500 bytes: + bytes 500-1233/1234 + + . The last 500 bytes: + bytes 734-1233/1234 + + When an HTTP message includes the content of a single range (for + example, a response to a request for a single range, or to a request + for a set of ranges that overlap without any holes), this content is + transmitted with a Content-Range header, and a Content-Length header + showing the number of bytes actually transferred. For example, + + HTTP/1.1 206 Partial content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-Range: bytes 21010-47021/47022 + Content-Length: 26012 + Content-Type: image/gif + + When an HTTP message includes the content of multiple ranges (for + example, a response to a request for multiple non-overlapping + ranges), these are transmitted as a multipart message. The multipart + media type used for this purpose is "multipart/byteranges" as defined + in appendix 19.2. See appendix 19.6.3 for a compatibility issue. + + A response to a request for a single range MUST NOT be sent using the + multipart/byteranges media type. A response to a request for + multiple ranges, whose result is a single range, MAY be sent as a + multipart/byteranges media type with one part. A client that cannot + decode a multipart/byteranges message MUST NOT ask for multiple + byte-ranges in a single request. + + When a client requests multiple byte-ranges in one request, the + server SHOULD return them in the order that they appeared in the + request. + + + +Fielding, et al. Standards Track [Page 123] + +RFC 2616 HTTP/1.1 June 1999 + + + If the server ignores a byte-range-spec because it is syntactically + invalid, the server SHOULD treat the request as if the invalid Range + header field did not exist. (Normally, this means return a 200 + response containing the full entity). + + If the server receives a request (other than one including an If- + Range request-header field) with an unsatisfiable Range request- + header field (that is, all of whose byte-range-spec values have a + first-byte-pos value greater than the current length of the selected + resource), it SHOULD return a response code of 416 (Requested range + not satisfiable) (section 10.4.17). + + Note: clients cannot depend on servers to send a 416 (Requested + range not satisfiable) response instead of a 200 (OK) response for + an unsatisfiable Range request-header, since not all servers + implement this request-header. + +14.17 Content-Type + + The Content-Type entity-header field indicates the media type of the + entity-body sent to the recipient or, in the case of the HEAD method, + the media type that would have been sent had the request been a GET. + + Content-Type = "Content-Type" ":" media-type + + Media types are defined in section 3.7. An example of the field is + + Content-Type: text/html; charset=ISO-8859-4 + + Further discussion of methods for identifying the media type of an + entity is provided in section 7.2.1. + +14.18 Date + + The Date general-header field represents the date and time at which + the message was originated, having the same semantics as orig-date in + RFC 822. The field value is an HTTP-date, as described in section + 3.3.1; it MUST be sent in RFC 1123 [8]-date format. + + Date = "Date" ":" HTTP-date + + An example is + + Date: Tue, 15 Nov 1994 08:12:31 GMT + + Origin servers MUST include a Date header field in all responses, + except in these cases: + + + + +Fielding, et al. Standards Track [Page 124] + +RFC 2616 HTTP/1.1 June 1999 + + + 1. If the response status code is 100 (Continue) or 101 (Switching + Protocols), the response MAY include a Date header field, at + the server's option. + + 2. If the response status code conveys a server error, e.g. 500 + (Internal Server Error) or 503 (Service Unavailable), and it is + inconvenient or impossible to generate a valid Date. + + 3. If the server does not have a clock that can provide a + reasonable approximation of the current time, its responses + MUST NOT include a Date header field. In this case, the rules + in section 14.18.1 MUST be followed. + + A received message that does not have a Date header field MUST be + assigned one by the recipient if the message will be cached by that + recipient or gatewayed via a protocol which requires a Date. An HTTP + implementation without a clock MUST NOT cache responses without + revalidating them on every use. An HTTP cache, especially a shared + cache, SHOULD use a mechanism, such as NTP [28], to synchronize its + clock with a reliable external standard. + + Clients SHOULD only send a Date header field in messages that include + an entity-body, as in the case of the PUT and POST requests, and even + then it is optional. A client without a clock MUST NOT send a Date + header field in a request. + + The HTTP-date sent in a Date header SHOULD NOT represent a date and + time subsequent to the generation of the message. It SHOULD represent + the best available approximation of the date and time of message + generation, unless the implementation has no means of generating a + reasonably accurate date and time. In theory, the date ought to + represent the moment just before the entity is generated. In + practice, the date can be generated at any time during the message + origination without affecting its semantic value. + +14.18.1 Clockless Origin Server Operation + + Some origin server implementations might not have a clock available. + An origin server without a clock MUST NOT assign Expires or Last- + Modified values to a response, unless these values were associated + with the resource by a system or user with a reliable clock. It MAY + assign an Expires value that is known, at or before server + configuration time, to be in the past (this allows "pre-expiration" + of responses without storing separate Expires values for each + resource). + + + + + + +Fielding, et al. Standards Track [Page 125] + +RFC 2616 HTTP/1.1 June 1999 + + +14.19 ETag + + The ETag response-header field provides the current value of the + entity tag for the requested variant. The headers used with entity + tags are described in sections 14.24, 14.26 and 14.44. The entity tag + MAY be used for comparison with other entities from the same resource + (see section 13.3.3). + + ETag = "ETag" ":" entity-tag + + Examples: + + ETag: "xyzzy" + ETag: W/"xyzzy" + ETag: "" + +14.20 Expect + + The Expect request-header field is used to indicate that particular + server behaviors are required by the client. + + Expect = "Expect" ":" 1#expectation + + expectation = "100-continue" | expectation-extension + expectation-extension = token [ "=" ( token | quoted-string ) + *expect-params ] + expect-params = ";" token [ "=" ( token | quoted-string ) ] + + + A server that does not understand or is unable to comply with any of + the expectation values in the Expect field of a request MUST respond + with appropriate error status. The server MUST respond with a 417 + (Expectation Failed) status if any of the expectations cannot be met + or, if there are other problems with the request, some other 4xx + status. + + This header field is defined with extensible syntax to allow for + future extensions. If a server receives a request containing an + Expect field that includes an expectation-extension that it does not + support, it MUST respond with a 417 (Expectation Failed) status. + + Comparison of expectation values is case-insensitive for unquoted + tokens (including the 100-continue token), and is case-sensitive for + quoted-string expectation-extensions. + + + + + + + +Fielding, et al. Standards Track [Page 126] + +RFC 2616 HTTP/1.1 June 1999 + + + The Expect mechanism is hop-by-hop: that is, an HTTP/1.1 proxy MUST + return a 417 (Expectation Failed) status if it receives a request + with an expectation that it cannot meet. However, the Expect + request-header itself is end-to-end; it MUST be forwarded if the + request is forwarded. + + Many older HTTP/1.0 and HTTP/1.1 applications do not understand the + Expect header. + + See section 8.2.3 for the use of the 100 (continue) status. + +14.21 Expires + + The Expires entity-header field gives the date/time after which the + response is considered stale. A stale cache entry may not normally be + returned by a cache (either a proxy cache or a user agent cache) + unless it is first validated with the origin server (or with an + intermediate cache that has a fresh copy of the entity). See section + 13.2 for further discussion of the expiration model. + + The presence of an Expires field does not imply that the original + resource will change or cease to exist at, before, or after that + time. + + The format is an absolute date and time as defined by HTTP-date in + section 3.3.1; it MUST be in RFC 1123 date format: + + Expires = "Expires" ":" HTTP-date + + An example of its use is + + Expires: Thu, 01 Dec 1994 16:00:00 GMT + + Note: if a response includes a Cache-Control field with the max- + age directive (see section 14.9.3), that directive overrides the + Expires field. + + HTTP/1.1 clients and caches MUST treat other invalid date formats, + especially including the value "0", as in the past (i.e., "already + expired"). + + To mark a response as "already expired," an origin server sends an + Expires date that is equal to the Date header value. (See the rules + for expiration calculations in section 13.2.4.) + + + + + + + +Fielding, et al. Standards Track [Page 127] + +RFC 2616 HTTP/1.1 June 1999 + + + To mark a response as "never expires," an origin server sends an + Expires date approximately one year from the time the response is + sent. HTTP/1.1 servers SHOULD NOT send Expires dates more than one + year in the future. + + The presence of an Expires header field with a date value of some + time in the future on a response that otherwise would by default be + non-cacheable indicates that the response is cacheable, unless + indicated otherwise by a Cache-Control header field (section 14.9). + +14.22 From + + The From request-header field, if given, SHOULD contain an Internet + e-mail address for the human user who controls the requesting user + agent. The address SHOULD be machine-usable, as defined by "mailbox" + in RFC 822 [9] as updated by RFC 1123 [8]: + + From = "From" ":" mailbox + + An example is: + + From: webmaster@w3.org + + This header field MAY be used for logging purposes and as a means for + identifying the source of invalid or unwanted requests. It SHOULD NOT + be used as an insecure form of access protection. The interpretation + of this field is that the request is being performed on behalf of the + person given, who accepts responsibility for the method performed. In + particular, robot agents SHOULD include this header so that the + person responsible for running the robot can be contacted if problems + occur on the receiving end. + + The Internet e-mail address in this field MAY be separate from the + Internet host which issued the request. For example, when a request + is passed through a proxy the original issuer's address SHOULD be + used. + + The client SHOULD NOT send the From header field without the user's + approval, as it might conflict with the user's privacy interests or + their site's security policy. It is strongly recommended that the + user be able to disable, enable, and modify the value of this field + at any time prior to a request. + +14.23 Host + + The Host request-header field specifies the Internet host and port + number of the resource being requested, as obtained from the original + URI given by the user or referring resource (generally an HTTP URL, + + + +Fielding, et al. Standards Track [Page 128] + +RFC 2616 HTTP/1.1 June 1999 + + + as described in section 3.2.2). The Host field value MUST represent + the naming authority of the origin server or gateway given by the + original URL. This allows the origin server or gateway to + differentiate between internally-ambiguous URLs, such as the root "/" + URL of a server for multiple host names on a single IP address. + + Host = "Host" ":" host [ ":" port ] ; Section 3.2.2 + + A "host" without any trailing port information implies the default + port for the service requested (e.g., "80" for an HTTP URL). For + example, a request on the origin server for + would properly include: + + GET /pub/WWW/ HTTP/1.1 + Host: www.w3.org + + A client MUST include a Host header field in all HTTP/1.1 request + messages . If the requested URI does not include an Internet host + name for the service being requested, then the Host header field MUST + be given with an empty value. An HTTP/1.1 proxy MUST ensure that any + request message it forwards does contain an appropriate Host header + field that identifies the service being requested by the proxy. All + Internet-based HTTP/1.1 servers MUST respond with a 400 (Bad Request) + status code to any HTTP/1.1 request message which lacks a Host header + field. + + See sections 5.2 and 19.6.1.1 for other requirements relating to + Host. + +14.24 If-Match + + The If-Match request-header field is used with a method to make it + conditional. A client that has one or more entities previously + obtained from the resource can verify that one of those entities is + current by including a list of their associated entity tags in the + If-Match header field. Entity tags are defined in section 3.11. The + purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. It is also + used, on updating requests, to prevent inadvertent modification of + the wrong version of a resource. As a special case, the value "*" + matches any current entity of the resource. + + If-Match = "If-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-Match header) on that resource, or if "*" is given + + + + +Fielding, et al. Standards Track [Page 129] + +RFC 2616 HTTP/1.1 June 1999 + + + and any current entity exists for that resource, then the server MAY + perform the requested method as if the If-Match header field did not + exist. + + A server MUST use the strong comparison function (see section 13.3.3) + to compare the entity tags in If-Match. + + If none of the entity tags match, or if "*" is given and no current + entity exists, the server MUST NOT perform the requested method, and + MUST return a 412 (Precondition Failed) response. This behavior is + most useful when the client wants to prevent an updating method, such + as PUT, from modifying a resource that has changed since the client + last retrieved it. + + If the request would, without the If-Match header field, result in + anything other than a 2xx or 412 status, then the If-Match header + MUST be ignored. + + The meaning of "If-Match: *" is that the method SHOULD be performed + if the representation selected by the origin server (or by a cache, + possibly using the Vary mechanism, see section 14.44) exists, and + MUST NOT be performed if the representation does not exist. + + A request intended to update a resource (e.g., a PUT) MAY include an + If-Match header field to signal that the request method MUST NOT be + applied if the entity corresponding to the If-Match value (a single + entity tag) is no longer a representation of that resource. This + allows the user to indicate that they do not wish the request to be + successful if the resource has been changed without their knowledge. + Examples: + + If-Match: "xyzzy" + If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-Match: * + + The result of a request having both an If-Match header field and + either an If-None-Match or an If-Modified-Since header fields is + undefined by this specification. + +14.25 If-Modified-Since + + The If-Modified-Since request-header field is used with a method to + make it conditional: if the requested variant has not been modified + since the time specified in this field, an entity will not be + returned from the server; instead, a 304 (not modified) response will + be returned without any message-body. + + If-Modified-Since = "If-Modified-Since" ":" HTTP-date + + + +Fielding, et al. Standards Track [Page 130] + +RFC 2616 HTTP/1.1 June 1999 + + + An example of the field is: + + If-Modified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + A GET method with an If-Modified-Since header and no Range header + requests that the identified entity be transferred only if it has + been modified since the date given by the If-Modified-Since header. + The algorithm for determining this includes the following cases: + + a) If the request would normally result in anything other than a + 200 (OK) status, or if the passed If-Modified-Since date is + invalid, the response is exactly the same as for a normal GET. + A date which is later than the server's current time is + invalid. + + b) If the variant has been modified since the If-Modified-Since + date, the response is exactly the same as for a normal GET. + + c) If the variant has not been modified since a valid If- + Modified-Since date, the server SHOULD return a 304 (Not + Modified) response. + + The purpose of this feature is to allow efficient updates of cached + information with a minimum amount of transaction overhead. + + Note: The Range request-header field modifies the meaning of If- + Modified-Since; see section 14.35 for full details. + + Note: If-Modified-Since times are interpreted by the server, whose + clock might not be synchronized with the client. + + Note: When handling an If-Modified-Since header field, some + servers will use an exact date comparison function, rather than a + less-than function, for deciding whether to send a 304 (Not + Modified) response. To get best results when sending an If- + Modified-Since header field for cache validation, clients are + advised to use the exact date string received in a previous Last- + Modified header field whenever possible. + + Note: If a client uses an arbitrary date in the If-Modified-Since + header instead of a date taken from the Last-Modified header for + the same request, the client should be aware of the fact that this + date is interpreted in the server's understanding of time. The + client should consider unsynchronized clocks and rounding problems + due to the different encodings of time between the client and + server. This includes the possibility of race conditions if the + document has changed between the time it was first requested and + the If-Modified-Since date of a subsequent request, and the + + + +Fielding, et al. Standards Track [Page 131] + +RFC 2616 HTTP/1.1 June 1999 + + + possibility of clock-skew-related problems if the If-Modified- + Since date is derived from the client's clock without correction + to the server's clock. Corrections for different time bases + between client and server are at best approximate due to network + latency. + + The result of a request having both an If-Modified-Since header field + and either an If-Match or an If-Unmodified-Since header fields is + undefined by this specification. + +14.26 If-None-Match + + The If-None-Match request-header field is used with a method to make + it conditional. A client that has one or more entities previously + obtained from the resource can verify that none of those entities is + current by including a list of their associated entity tags in the + If-None-Match header field. The purpose of this feature is to allow + efficient updates of cached information with a minimum amount of + transaction overhead. It is also used to prevent a method (e.g. PUT) + from inadvertently modifying an existing resource when the client + believes that the resource does not exist. + + As a special case, the value "*" matches any current entity of the + resource. + + If-None-Match = "If-None-Match" ":" ( "*" | 1#entity-tag ) + + If any of the entity tags match the entity tag of the entity that + would have been returned in the response to a similar GET request + (without the If-None-Match header) on that resource, or if "*" is + given and any current entity exists for that resource, then the + server MUST NOT perform the requested method, unless required to do + so because the resource's modification date fails to match that + supplied in an If-Modified-Since header field in the request. + Instead, if the request method was GET or HEAD, the server SHOULD + respond with a 304 (Not Modified) response, including the cache- + related header fields (particularly ETag) of one of the entities that + matched. For all other request methods, the server MUST respond with + a status of 412 (Precondition Failed). + + See section 13.3.3 for rules on how to determine if two entities tags + match. The weak comparison function can only be used with GET or HEAD + requests. + + + + + + + + +Fielding, et al. Standards Track [Page 132] + +RFC 2616 HTTP/1.1 June 1999 + + + If none of the entity tags match, then the server MAY perform the + requested method as if the If-None-Match header field did not exist, + but MUST also ignore any If-Modified-Since header field(s) in the + request. That is, if no entity tags match, then the server MUST NOT + return a 304 (Not Modified) response. + + If the request would, without the If-None-Match header field, result + in anything other than a 2xx or 304 status, then the If-None-Match + header MUST be ignored. (See section 13.3.4 for a discussion of + server behavior when both If-Modified-Since and If-None-Match appear + in the same request.) + + The meaning of "If-None-Match: *" is that the method MUST NOT be + performed if the representation selected by the origin server (or by + a cache, possibly using the Vary mechanism, see section 14.44) + exists, and SHOULD be performed if the representation does not exist. + This feature is intended to be useful in preventing races between PUT + operations. + + Examples: + + If-None-Match: "xyzzy" + If-None-Match: W/"xyzzy" + If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz" + If-None-Match: W/"xyzzy", W/"r2d2xxxx", W/"c3piozzzz" + If-None-Match: * + + The result of a request having both an If-None-Match header field and + either an If-Match or an If-Unmodified-Since header fields is + undefined by this specification. + +14.27 If-Range + + If a client has a partial copy of an entity in its cache, and wishes + to have an up-to-date copy of the entire entity in its cache, it + could use the Range request-header with a conditional GET (using + either or both of If-Unmodified-Since and If-Match.) However, if the + condition fails because the entity has been modified, the client + would then have to make a second request to obtain the entire current + entity-body. + + The If-Range header allows a client to "short-circuit" the second + request. Informally, its meaning is `if the entity is unchanged, send + me the part(s) that I am missing; otherwise, send me the entire new + entity'. + + If-Range = "If-Range" ":" ( entity-tag | HTTP-date ) + + + + +Fielding, et al. Standards Track [Page 133] + +RFC 2616 HTTP/1.1 June 1999 + + + If the client has no entity tag for an entity, but does have a Last- + Modified date, it MAY use that date in an If-Range header. (The + server can distinguish between a valid HTTP-date and any form of + entity-tag by examining no more than two characters.) The If-Range + header SHOULD only be used together with a Range header, and MUST be + ignored if the request does not include a Range header, or if the + server does not support the sub-range operation. + + If the entity tag given in the If-Range header matches the current + entity tag for the entity, then the server SHOULD provide the + specified sub-range of the entity using a 206 (Partial content) + response. If the entity tag does not match, then the server SHOULD + return the entire entity using a 200 (OK) response. + +14.28 If-Unmodified-Since + + The If-Unmodified-Since request-header field is used with a method to + make it conditional. If the requested resource has not been modified + since the time specified in this field, the server SHOULD perform the + requested operation as if the If-Unmodified-Since header were not + present. + + If the requested variant has been modified since the specified time, + the server MUST NOT perform the requested operation, and MUST return + a 412 (Precondition Failed). + + If-Unmodified-Since = "If-Unmodified-Since" ":" HTTP-date + + An example of the field is: + + If-Unmodified-Since: Sat, 29 Oct 1994 19:43:31 GMT + + If the request normally (i.e., without the If-Unmodified-Since + header) would result in anything other than a 2xx or 412 status, the + If-Unmodified-Since header SHOULD be ignored. + + If the specified date is invalid, the header is ignored. + + The result of a request having both an If-Unmodified-Since header + field and either an If-None-Match or an If-Modified-Since header + fields is undefined by this specification. + +14.29 Last-Modified + + The Last-Modified entity-header field indicates the date and time at + which the origin server believes the variant was last modified. + + Last-Modified = "Last-Modified" ":" HTTP-date + + + +Fielding, et al. Standards Track [Page 134] + +RFC 2616 HTTP/1.1 June 1999 + + + An example of its use is + + Last-Modified: Tue, 15 Nov 1994 12:45:26 GMT + + The exact meaning of this header field depends on the implementation + of the origin server and the nature of the original resource. For + files, it may be just the file system last-modified time. For + entities with dynamically included parts, it may be the most recent + of the set of last-modify times for its component parts. For database + gateways, it may be the last-update time stamp of the record. For + virtual objects, it may be the last time the internal state changed. + + An origin server MUST NOT send a Last-Modified date which is later + than the server's time of message origination. In such cases, where + the resource's last modification would indicate some time in the + future, the server MUST replace that date with the message + origination date. + + An origin server SHOULD obtain the Last-Modified value of the entity + as close as possible to the time that it generates the Date value of + its response. This allows a recipient to make an accurate assessment + of the entity's modification time, especially if the entity changes + near the time that the response is generated. + + HTTP/1.1 servers SHOULD send Last-Modified whenever feasible. + +14.30 Location + + The Location response-header field is used to redirect the recipient + to a location other than the Request-URI for completion of the + request or identification of a new resource. For 201 (Created) + responses, the Location is that of the new resource which was created + by the request. For 3xx responses, the location SHOULD indicate the + server's preferred URI for automatic redirection to the resource. The + field value consists of a single absolute URI. + + Location = "Location" ":" absoluteURI + + An example is: + + Location: http://www.w3.org/pub/WWW/People.html + + Note: The Content-Location header field (section 14.14) differs + from Location in that the Content-Location identifies the original + location of the entity enclosed in the request. It is therefore + possible for a response to contain header fields for both Location + and Content-Location. Also see section 13.10 for cache + requirements of some methods. + + + +Fielding, et al. Standards Track [Page 135] + +RFC 2616 HTTP/1.1 June 1999 + + +14.31 Max-Forwards + + The Max-Forwards request-header field provides a mechanism with the + TRACE (section 9.8) and OPTIONS (section 9.2) methods to limit the + number of proxies or gateways that can forward the request to the + next inbound server. This can be useful when the client is attempting + to trace a request chain which appears to be failing or looping in + mid-chain. + + Max-Forwards = "Max-Forwards" ":" 1*DIGIT + + The Max-Forwards value is a decimal integer indicating the remaining + number of times this request message may be forwarded. + + Each proxy or gateway recipient of a TRACE or OPTIONS request + containing a Max-Forwards header field MUST check and update its + value prior to forwarding the request. If the received value is zero + (0), the recipient MUST NOT forward the request; instead, it MUST + respond as the final recipient. If the received Max-Forwards value is + greater than zero, then the forwarded message MUST contain an updated + Max-Forwards field with a value decremented by one (1). + + The Max-Forwards header field MAY be ignored for all other methods + defined by this specification and for any extension methods for which + it is not explicitly referred to as part of that method definition. + +14.32 Pragma + + The Pragma general-header field is used to include implementation- + specific directives that might apply to any recipient along the + request/response chain. All pragma directives specify optional + behavior from the viewpoint of the protocol; however, some systems + MAY require that behavior be consistent with the directives. + + Pragma = "Pragma" ":" 1#pragma-directive + pragma-directive = "no-cache" | extension-pragma + extension-pragma = token [ "=" ( token | quoted-string ) ] + + When the no-cache directive is present in a request message, an + application SHOULD forward the request toward the origin server even + if it has a cached copy of what is being requested. This pragma + directive has the same semantics as the no-cache cache-directive (see + section 14.9) and is defined here for backward compatibility with + HTTP/1.0. Clients SHOULD include both header fields when a no-cache + request is sent to a server not known to be HTTP/1.1 compliant. + + + + + + +Fielding, et al. Standards Track [Page 136] + +RFC 2616 HTTP/1.1 June 1999 + + + Pragma directives MUST be passed through by a proxy or gateway + application, regardless of their significance to that application, + since the directives might be applicable to all recipients along the + request/response chain. It is not possible to specify a pragma for a + specific recipient; however, any pragma directive not relevant to a + recipient SHOULD be ignored by that recipient. + + HTTP/1.1 caches SHOULD treat "Pragma: no-cache" as if the client had + sent "Cache-Control: no-cache". No new Pragma directives will be + defined in HTTP. + + Note: because the meaning of "Pragma: no-cache as a response + header field is not actually specified, it does not provide a + reliable replacement for "Cache-Control: no-cache" in a response + +14.33 Proxy-Authenticate + + The Proxy-Authenticate response-header field MUST be included as part + of a 407 (Proxy Authentication Required) response. The field value + consists of a challenge that indicates the authentication scheme and + parameters applicable to the proxy for this Request-URI. + + Proxy-Authenticate = "Proxy-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. Unlike + WWW-Authenticate, the Proxy-Authenticate header field applies only to + the current connection and SHOULD NOT be passed on to downstream + clients. However, an intermediate proxy might need to obtain its own + credentials by requesting them from the downstream client, which in + some circumstances will appear as if the proxy is forwarding the + Proxy-Authenticate header field. + +14.34 Proxy-Authorization + + The Proxy-Authorization request-header field allows the client to + identify itself (or its user) to a proxy which requires + authentication. The Proxy-Authorization field value consists of + credentials containing the authentication information of the user + agent for the proxy and/or realm of the resource being requested. + + Proxy-Authorization = "Proxy-Authorization" ":" credentials + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43] . Unlike + Authorization, the Proxy-Authorization header field applies only to + the next outbound proxy that demanded authentication using the Proxy- + Authenticate field. When multiple proxies are used in a chain, the + + + +Fielding, et al. Standards Track [Page 137] + +RFC 2616 HTTP/1.1 June 1999 + + + Proxy-Authorization header field is consumed by the first outbound + proxy that was expecting to receive credentials. A proxy MAY relay + the credentials from the client request to the next proxy if that is + the mechanism by which the proxies cooperatively authenticate a given + request. + +14.35 Range + +14.35.1 Byte Ranges + + Since all HTTP entities are represented in HTTP messages as sequences + of bytes, the concept of a byte range is meaningful for any HTTP + entity. (However, not all clients and servers need to support byte- + range operations.) + + Byte range specifications in HTTP apply to the sequence of bytes in + the entity-body (not necessarily the same as the message-body). + + A byte range operation MAY specify a single range of bytes, or a set + of ranges within a single entity. + + ranges-specifier = byte-ranges-specifier + byte-ranges-specifier = bytes-unit "=" byte-range-set + byte-range-set = 1#( byte-range-spec | suffix-byte-range-spec ) + byte-range-spec = first-byte-pos "-" [last-byte-pos] + first-byte-pos = 1*DIGIT + last-byte-pos = 1*DIGIT + + The first-byte-pos value in a byte-range-spec gives the byte-offset + of the first byte in a range. The last-byte-pos value gives the + byte-offset of the last byte in the range; that is, the byte + positions specified are inclusive. Byte offsets start at zero. + + If the last-byte-pos value is present, it MUST be greater than or + equal to the first-byte-pos in that byte-range-spec, or the byte- + range-spec is syntactically invalid. The recipient of a byte-range- + set that includes one or more syntactically invalid byte-range-spec + values MUST ignore the header field that includes that byte-range- + set. + + If the last-byte-pos value is absent, or if the value is greater than + or equal to the current length of the entity-body, last-byte-pos is + taken to be equal to one less than the current length of the entity- + body in bytes. + + By its choice of last-byte-pos, a client can limit the number of + bytes retrieved without knowing the size of the entity. + + + + +Fielding, et al. Standards Track [Page 138] + +RFC 2616 HTTP/1.1 June 1999 + + + suffix-byte-range-spec = "-" suffix-length + suffix-length = 1*DIGIT + + A suffix-byte-range-spec is used to specify the suffix of the + entity-body, of a length given by the suffix-length value. (That is, + this form specifies the last N bytes of an entity-body.) If the + entity is shorter than the specified suffix-length, the entire + entity-body is used. + + If a syntactically valid byte-range-set includes at least one byte- + range-spec whose first-byte-pos is less than the current length of + the entity-body, or at least one suffix-byte-range-spec with a non- + zero suffix-length, then the byte-range-set is satisfiable. + Otherwise, the byte-range-set is unsatisfiable. If the byte-range-set + is unsatisfiable, the server SHOULD return a response with a status + of 416 (Requested range not satisfiable). Otherwise, the server + SHOULD return a response with a status of 206 (Partial Content) + containing the satisfiable ranges of the entity-body. + + Examples of byte-ranges-specifier values (assuming an entity-body of + length 10000): + + - The first 500 bytes (byte offsets 0-499, inclusive): bytes=0- + 499 + + - The second 500 bytes (byte offsets 500-999, inclusive): + bytes=500-999 + + - The final 500 bytes (byte offsets 9500-9999, inclusive): + bytes=-500 + + - Or bytes=9500- + + - The first and last bytes only (bytes 0 and 9999): bytes=0-0,-1 + + - Several legal but not canonical specifications of the second 500 + bytes (byte offsets 500-999, inclusive): + bytes=500-600,601-999 + bytes=500-700,601-999 + +14.35.2 Range Retrieval Requests + + HTTP retrieval requests using conditional or unconditional GET + methods MAY request one or more sub-ranges of the entity, instead of + the entire entity, using the Range request header, which applies to + the entity returned as the result of the request: + + Range = "Range" ":" ranges-specifier + + + +Fielding, et al. Standards Track [Page 139] + +RFC 2616 HTTP/1.1 June 1999 + + + A server MAY ignore the Range header. However, HTTP/1.1 origin + servers and intermediate caches ought to support byte ranges when + possible, since Range supports efficient recovery from partially + failed transfers, and supports efficient partial retrieval of large + entities. + + If the server supports the Range header and the specified range or + ranges are appropriate for the entity: + + - The presence of a Range header in an unconditional GET modifies + what is returned if the GET is otherwise successful. In other + words, the response carries a status code of 206 (Partial + Content) instead of 200 (OK). + + - The presence of a Range header in a conditional GET (a request + using one or both of If-Modified-Since and If-None-Match, or + one or both of If-Unmodified-Since and If-Match) modifies what + is returned if the GET is otherwise successful and the + condition is true. It does not affect the 304 (Not Modified) + response returned if the conditional is false. + + In some cases, it might be more appropriate to use the If-Range + header (see section 14.27) in addition to the Range header. + + If a proxy that supports ranges receives a Range request, forwards + the request to an inbound server, and receives an entire entity in + reply, it SHOULD only return the requested range to its client. It + SHOULD store the entire received response in its cache if that is + consistent with its cache allocation policies. + +14.36 Referer + + The Referer[sic] request-header field allows the client to specify, + for the server's benefit, the address (URI) of the resource from + which the Request-URI was obtained (the "referrer", although the + header field is misspelled.) The Referer request-header allows a + server to generate lists of back-links to resources for interest, + logging, optimized caching, etc. It also allows obsolete or mistyped + links to be traced for maintenance. The Referer field MUST NOT be + sent if the Request-URI was obtained from a source that does not have + its own URI, such as input from the user keyboard. + + Referer = "Referer" ":" ( absoluteURI | relativeURI ) + + Example: + + Referer: http://www.w3.org/hypertext/DataSources/Overview.html + + + + +Fielding, et al. Standards Track [Page 140] + +RFC 2616 HTTP/1.1 June 1999 + + + If the field value is a relative URI, it SHOULD be interpreted + relative to the Request-URI. The URI MUST NOT include a fragment. See + section 15.1.3 for security considerations. + +14.37 Retry-After + + The Retry-After response-header field can be used with a 503 (Service + Unavailable) response to indicate how long the service is expected to + be unavailable to the requesting client. This field MAY also be used + with any 3xx (Redirection) response to indicate the minimum time the + user-agent is asked wait before issuing the redirected request. The + value of this field can be either an HTTP-date or an integer number + of seconds (in decimal) after the time of the response. + + Retry-After = "Retry-After" ":" ( HTTP-date | delta-seconds ) + + Two examples of its use are + + Retry-After: Fri, 31 Dec 1999 23:59:59 GMT + Retry-After: 120 + + In the latter example, the delay is 2 minutes. + +14.38 Server + + The Server response-header field contains information about the + software used by the origin server to handle the request. The field + can contain multiple product tokens (section 3.8) and comments + identifying the server and any significant subproducts. The product + tokens are listed in order of their significance for identifying the + application. + + Server = "Server" ":" 1*( product | comment ) + + Example: + + Server: CERN/3.0 libwww/2.17 + + If the response is being forwarded through a proxy, the proxy + application MUST NOT modify the Server response-header. Instead, it + SHOULD include a Via field (as described in section 14.45). + + Note: Revealing the specific software version of the server might + allow the server machine to become more vulnerable to attacks + against software that is known to contain security holes. Server + implementors are encouraged to make this field a configurable + option. + + + + +Fielding, et al. Standards Track [Page 141] + +RFC 2616 HTTP/1.1 June 1999 + + +14.39 TE + + The TE request-header field indicates what extension transfer-codings + it is willing to accept in the response and whether or not it is + willing to accept trailer fields in a chunked transfer-coding. Its + value may consist of the keyword "trailers" and/or a comma-separated + list of extension transfer-coding names with optional accept + parameters (as described in section 3.6). + + TE = "TE" ":" #( t-codings ) + t-codings = "trailers" | ( transfer-extension [ accept-params ] ) + + The presence of the keyword "trailers" indicates that the client is + willing to accept trailer fields in a chunked transfer-coding, as + defined in section 3.6.1. This keyword is reserved for use with + transfer-coding values even though it does not itself represent a + transfer-coding. + + Examples of its use are: + + TE: deflate + TE: + TE: trailers, deflate;q=0.5 + + The TE header field only applies to the immediate connection. + Therefore, the keyword MUST be supplied within a Connection header + field (section 14.10) whenever TE is present in an HTTP/1.1 message. + + A server tests whether a transfer-coding is acceptable, according to + a TE field, using these rules: + + 1. The "chunked" transfer-coding is always acceptable. If the + keyword "trailers" is listed, the client indicates that it is + willing to accept trailer fields in the chunked response on + behalf of itself and any downstream clients. The implication is + that, if given, the client is stating that either all + downstream clients are willing to accept trailer fields in the + forwarded response, or that it will attempt to buffer the + response on behalf of downstream recipients. + + Note: HTTP/1.1 does not define any means to limit the size of a + chunked response such that a client can be assured of buffering + the entire response. + + 2. If the transfer-coding being tested is one of the transfer- + codings listed in the TE field, then it is acceptable unless it + is accompanied by a qvalue of 0. (As defined in section 3.9, a + qvalue of 0 means "not acceptable.") + + + +Fielding, et al. Standards Track [Page 142] + +RFC 2616 HTTP/1.1 June 1999 + + + 3. If multiple transfer-codings are acceptable, then the + acceptable transfer-coding with the highest non-zero qvalue is + preferred. The "chunked" transfer-coding always has a qvalue + of 1. + + If the TE field-value is empty or if no TE field is present, the only + transfer-coding is "chunked". A message with no transfer-coding is + always acceptable. + +14.40 Trailer + + The Trailer general field value indicates that the given set of + header fields is present in the trailer of a message encoded with + chunked transfer-coding. + + Trailer = "Trailer" ":" 1#field-name + + An HTTP/1.1 message SHOULD include a Trailer header field in a + message using chunked transfer-coding with a non-empty trailer. Doing + so allows the recipient to know which header fields to expect in the + trailer. + + If no Trailer header field is present, the trailer SHOULD NOT include + any header fields. See section 3.6.1 for restrictions on the use of + trailer fields in a "chunked" transfer-coding. + + Message header fields listed in the Trailer header field MUST NOT + include the following header fields: + + . Transfer-Encoding + + . Content-Length + + . Trailer + +14.41 Transfer-Encoding + + The Transfer-Encoding general-header field indicates what (if any) + type of transformation has been applied to the message body in order + to safely transfer it between the sender and the recipient. This + differs from the content-coding in that the transfer-coding is a + property of the message, not of the entity. + + Transfer-Encoding = "Transfer-Encoding" ":" 1#transfer-coding + + Transfer-codings are defined in section 3.6. An example is: + + Transfer-Encoding: chunked + + + +Fielding, et al. Standards Track [Page 143] + +RFC 2616 HTTP/1.1 June 1999 + + + If multiple encodings have been applied to an entity, the transfer- + codings MUST be listed in the order in which they were applied. + Additional information about the encoding parameters MAY be provided + by other entity-header fields not defined by this specification. + + Many older HTTP/1.0 applications do not understand the Transfer- + Encoding header. + +14.42 Upgrade + + The Upgrade general-header allows the client to specify what + additional communication protocols it supports and would like to use + if the server finds it appropriate to switch protocols. The server + MUST use the Upgrade header field within a 101 (Switching Protocols) + response to indicate which protocol(s) are being switched. + + Upgrade = "Upgrade" ":" 1#product + + For example, + + Upgrade: HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11 + + The Upgrade header field is intended to provide a simple mechanism + for transition from HTTP/1.1 to some other, incompatible protocol. It + does so by allowing the client to advertise its desire to use another + protocol, such as a later version of HTTP with a higher major version + number, even though the current request has been made using HTTP/1.1. + This eases the difficult transition between incompatible protocols by + allowing the client to initiate a request in the more commonly + supported protocol while indicating to the server that it would like + to use a "better" protocol if available (where "better" is determined + by the server, possibly according to the nature of the method and/or + resource being requested). + + The Upgrade header field only applies to switching application-layer + protocols upon the existing transport-layer connection. Upgrade + cannot be used to insist on a protocol change; its acceptance and use + by the server is optional. The capabilities and nature of the + application-layer communication after the protocol change is entirely + dependent upon the new protocol chosen, although the first action + after changing the protocol MUST be a response to the initial HTTP + request containing the Upgrade header field. + + The Upgrade header field only applies to the immediate connection. + Therefore, the upgrade keyword MUST be supplied within a Connection + header field (section 14.10) whenever Upgrade is present in an + HTTP/1.1 message. + + + + +Fielding, et al. Standards Track [Page 144] + +RFC 2616 HTTP/1.1 June 1999 + + + The Upgrade header field cannot be used to indicate a switch to a + protocol on a different connection. For that purpose, it is more + appropriate to use a 301, 302, 303, or 305 redirection response. + + This specification only defines the protocol name "HTTP" for use by + the family of Hypertext Transfer Protocols, as defined by the HTTP + version rules of section 3.1 and future updates to this + specification. Any token can be used as a protocol name; however, it + will only be useful if both the client and server associate the name + with the same protocol. + +14.43 User-Agent + + The User-Agent request-header field contains information about the + user agent originating the request. This is for statistical purposes, + the tracing of protocol violations, and automated recognition of user + agents for the sake of tailoring responses to avoid particular user + agent limitations. User agents SHOULD include this field with + requests. The field can contain multiple product tokens (section 3.8) + and comments identifying the agent and any subproducts which form a + significant part of the user agent. By convention, the product tokens + are listed in order of their significance for identifying the + application. + + User-Agent = "User-Agent" ":" 1*( product | comment ) + + Example: + + User-Agent: CERN-LineMode/2.15 libwww/2.17b3 + +14.44 Vary + + The Vary field value indicates the set of request-header fields that + fully determines, while the response is fresh, whether a cache is + permitted to use the response to reply to a subsequent request + without revalidation. For uncacheable or stale responses, the Vary + field value advises the user agent about the criteria that were used + to select the representation. A Vary field value of "*" implies that + a cache cannot determine from the request headers of a subsequent + request whether this response is the appropriate representation. See + section 13.6 for use of the Vary header field by caches. + + Vary = "Vary" ":" ( "*" | 1#field-name ) + + An HTTP/1.1 server SHOULD include a Vary header field with any + cacheable response that is subject to server-driven negotiation. + Doing so allows a cache to properly interpret future requests on that + resource and informs the user agent about the presence of negotiation + + + +Fielding, et al. Standards Track [Page 145] + +RFC 2616 HTTP/1.1 June 1999 + + + on that resource. A server MAY include a Vary header field with a + non-cacheable response that is subject to server-driven negotiation, + since this might provide the user agent with useful information about + the dimensions over which the response varies at the time of the + response. + + A Vary field value consisting of a list of field-names signals that + the representation selected for the response is based on a selection + algorithm which considers ONLY the listed request-header field values + in selecting the most appropriate representation. A cache MAY assume + that the same selection will be made for future requests with the + same values for the listed field names, for the duration of time for + which the response is fresh. + + The field-names given are not limited to the set of standard + request-header fields defined by this specification. Field names are + case-insensitive. + + A Vary field value of "*" signals that unspecified parameters not + limited to the request-headers (e.g., the network address of the + client), play a role in the selection of the response representation. + The "*" value MUST NOT be generated by a proxy server; it may only be + generated by an origin server. + +14.45 Via + + The Via general-header field MUST be used by gateways and proxies to + indicate the intermediate protocols and recipients between the user + agent and the server on requests, and between the origin server and + the client on responses. It is analogous to the "Received" field of + RFC 822 [9] and is intended to be used for tracking message forwards, + avoiding request loops, and identifying the protocol capabilities of + all senders along the request/response chain. + + Via = "Via" ":" 1#( received-protocol received-by [ comment ] ) + received-protocol = [ protocol-name "/" ] protocol-version + protocol-name = token + protocol-version = token + received-by = ( host [ ":" port ] ) | pseudonym + pseudonym = token + + The received-protocol indicates the protocol version of the message + received by the server or client along each segment of the + request/response chain. The received-protocol version is appended to + the Via field value when the message is forwarded so that information + about the protocol capabilities of upstream applications remains + visible to all recipients. + + + + +Fielding, et al. Standards Track [Page 146] + +RFC 2616 HTTP/1.1 June 1999 + + + The protocol-name is optional if and only if it would be "HTTP". The + received-by field is normally the host and optional port number of a + recipient server or client that subsequently forwarded the message. + However, if the real host is considered to be sensitive information, + it MAY be replaced by a pseudonym. If the port is not given, it MAY + be assumed to be the default port of the received-protocol. + + Multiple Via field values represents each proxy or gateway that has + forwarded the message. Each recipient MUST append its information + such that the end result is ordered according to the sequence of + forwarding applications. + + Comments MAY be used in the Via header field to identify the software + of the recipient proxy or gateway, analogous to the User-Agent and + Server header fields. However, all comments in the Via field are + optional and MAY be removed by any recipient prior to forwarding the + message. + + For example, a request message could be sent from an HTTP/1.0 user + agent to an internal proxy code-named "fred", which uses HTTP/1.1 to + forward the request to a public proxy at nowhere.com, which completes + the request by forwarding it to the origin server at www.ics.uci.edu. + The request received by www.ics.uci.edu would then have the following + Via header field: + + Via: 1.0 fred, 1.1 nowhere.com (Apache/1.1) + + Proxies and gateways used as a portal through a network firewall + SHOULD NOT, by default, forward the names and ports of hosts within + the firewall region. This information SHOULD only be propagated if + explicitly enabled. If not enabled, the received-by host of any host + behind the firewall SHOULD be replaced by an appropriate pseudonym + for that host. + + For organizations that have strong privacy requirements for hiding + internal structures, a proxy MAY combine an ordered subsequence of + Via header field entries with identical received-protocol values into + a single such entry. For example, + + Via: 1.0 ricky, 1.1 ethel, 1.1 fred, 1.0 lucy + + could be collapsed to + + Via: 1.0 ricky, 1.1 mertz, 1.0 lucy + + + + + + + +Fielding, et al. Standards Track [Page 147] + +RFC 2616 HTTP/1.1 June 1999 + + + Applications SHOULD NOT combine multiple entries unless they are all + under the same organizational control and the hosts have already been + replaced by pseudonyms. Applications MUST NOT combine entries which + have different received-protocol values. + +14.46 Warning + + The Warning general-header field is used to carry additional + information about the status or transformation of a message which + might not be reflected in the message. This information is typically + used to warn about a possible lack of semantic transparency from + caching operations or transformations applied to the entity body of + the message. + + Warning headers are sent with responses using: + + Warning = "Warning" ":" 1#warning-value + + warning-value = warn-code SP warn-agent SP warn-text + [SP warn-date] + + warn-code = 3DIGIT + warn-agent = ( host [ ":" port ] ) | pseudonym + ; the name or pseudonym of the server adding + ; the Warning header, for use in debugging + warn-text = quoted-string + warn-date = <"> HTTP-date <"> + + A response MAY carry more than one Warning header. + + The warn-text SHOULD be in a natural language and character set that + is most likely to be intelligible to the human user receiving the + response. This decision MAY be based on any available knowledge, such + as the location of the cache or user, the Accept-Language field in a + request, the Content-Language field in a response, etc. The default + language is English and the default character set is ISO-8859-1. + + If a character set other than ISO-8859-1 is used, it MUST be encoded + in the warn-text using the method described in RFC 2047 [14]. + + Warning headers can in general be applied to any message, however + some specific warn-codes are specific to caches and can only be + applied to response messages. New Warning headers SHOULD be added + after any existing Warning headers. A cache MUST NOT delete any + Warning header that it received with a message. However, if a cache + successfully validates a cache entry, it SHOULD remove any Warning + headers previously attached to that entry except as specified for + + + + +Fielding, et al. Standards Track [Page 148] + +RFC 2616 HTTP/1.1 June 1999 + + + specific Warning codes. It MUST then add any Warning headers received + in the validating response. In other words, Warning headers are those + that would be attached to the most recent relevant response. + + When multiple Warning headers are attached to a response, the user + agent ought to inform the user of as many of them as possible, in the + order that they appear in the response. If it is not possible to + inform the user of all of the warnings, the user agent SHOULD follow + these heuristics: + + - Warnings that appear early in the response take priority over + those appearing later in the response. + + - Warnings in the user's preferred character set take priority + over warnings in other character sets but with identical warn- + codes and warn-agents. + + Systems that generate multiple Warning headers SHOULD order them with + this user agent behavior in mind. + + Requirements for the behavior of caches with respect to Warnings are + stated in section 13.1.2. + + This is a list of the currently-defined warn-codes, each with a + recommended warn-text in English, and a description of its meaning. + + 110 Response is stale + MUST be included whenever the returned response is stale. + + 111 Revalidation failed + MUST be included if a cache returns a stale response because an + attempt to revalidate the response failed, due to an inability to + reach the server. + + 112 Disconnected operation + SHOULD be included if the cache is intentionally disconnected from + the rest of the network for a period of time. + + 113 Heuristic expiration + MUST be included if the cache heuristically chose a freshness + lifetime greater than 24 hours and the response's age is greater + than 24 hours. + + 199 Miscellaneous warning + The warning text MAY include arbitrary information to be presented + to a human user, or logged. A system receiving this warning MUST + NOT take any automated action, besides presenting the warning to + the user. + + + +Fielding, et al. Standards Track [Page 149] + +RFC 2616 HTTP/1.1 June 1999 + + + 214 Transformation applied + MUST be added by an intermediate cache or proxy if it applies any + transformation changing the content-coding (as specified in the + Content-Encoding header) or media-type (as specified in the + Content-Type header) of the response, or the entity-body of the + response, unless this Warning code already appears in the response. + + 299 Miscellaneous persistent warning + The warning text MAY include arbitrary information to be presented + to a human user, or logged. A system receiving this warning MUST + NOT take any automated action. + + If an implementation sends a message with one or more Warning headers + whose version is HTTP/1.0 or lower, then the sender MUST include in + each warning-value a warn-date that matches the date in the response. + + If an implementation receives a message with a warning-value that + includes a warn-date, and that warn-date is different from the Date + value in the response, then that warning-value MUST be deleted from + the message before storing, forwarding, or using it. (This prevents + bad consequences of naive caching of Warning header fields.) If all + of the warning-values are deleted for this reason, the Warning header + MUST be deleted as well. + +14.47 WWW-Authenticate + + The WWW-Authenticate response-header field MUST be included in 401 + (Unauthorized) response messages. The field value consists of at + least one challenge that indicates the authentication scheme(s) and + parameters applicable to the Request-URI. + + WWW-Authenticate = "WWW-Authenticate" ":" 1#challenge + + The HTTP access authentication process is described in "HTTP + Authentication: Basic and Digest Access Authentication" [43]. User + agents are advised to take special care in parsing the WWW- + Authenticate field value as it might contain more than one challenge, + or if more than one WWW-Authenticate header field is provided, the + contents of a challenge itself can contain a comma-separated list of + authentication parameters. + +15 Security Considerations + + This section is meant to inform application developers, information + providers, and users of the security limitations in HTTP/1.1 as + described by this document. The discussion does not include + definitive solutions to the problems revealed, though it does make + some suggestions for reducing security risks. + + + +Fielding, et al. Standards Track [Page 150] + +RFC 2616 HTTP/1.1 June 1999 + + +15.1 Personal Information + + HTTP clients are often privy to large amounts of personal information + (e.g. the user's name, location, mail address, passwords, encryption + keys, etc.), and SHOULD be very careful to prevent unintentional + leakage of this information via the HTTP protocol to other sources. + We very strongly recommend that a convenient interface be provided + for the user to control dissemination of such information, and that + designers and implementors be particularly careful in this area. + History shows that errors in this area often create serious security + and/or privacy problems and generate highly adverse publicity for the + implementor's company. + +15.1.1 Abuse of Server Log Information + + A server is in the position to save personal data about a user's + requests which might identify their reading patterns or subjects of + interest. This information is clearly confidential in nature and its + handling can be constrained by law in certain countries. People using + the HTTP protocol to provide data are responsible for ensuring that + such material is not distributed without the permission of any + individuals that are identifiable by the published results. + +15.1.2 Transfer of Sensitive Information + + Like any generic data transfer protocol, HTTP cannot regulate the + content of the data that is transferred, nor is there any a priori + method of determining the sensitivity of any particular piece of + information within the context of any given request. Therefore, + applications SHOULD supply as much control over this information as + possible to the provider of that information. Four header fields are + worth special mention in this context: Server, Via, Referer and From. + + Revealing the specific software version of the server might allow the + server machine to become more vulnerable to attacks against software + that is known to contain security holes. Implementors SHOULD make the + Server header field a configurable option. + + Proxies which serve as a portal through a network firewall SHOULD + take special precautions regarding the transfer of header information + that identifies the hosts behind the firewall. In particular, they + SHOULD remove, or replace with sanitized versions, any Via fields + generated behind the firewall. + + The Referer header allows reading patterns to be studied and reverse + links drawn. Although it can be very useful, its power can be abused + if user details are not separated from the information contained in + + + + +Fielding, et al. Standards Track [Page 151] + +RFC 2616 HTTP/1.1 June 1999 + + + the Referer. Even when the personal information has been removed, the + Referer header might indicate a private document's URI whose + publication would be inappropriate. + + The information sent in the From field might conflict with the user's + privacy interests or their site's security policy, and hence it + SHOULD NOT be transmitted without the user being able to disable, + enable, and modify the contents of the field. The user MUST be able + to set the contents of this field within a user preference or + application defaults configuration. + + We suggest, though do not require, that a convenient toggle interface + be provided for the user to enable or disable the sending of From and + Referer information. + + The User-Agent (section 14.43) or Server (section 14.38) header + fields can sometimes be used to determine that a specific client or + server have a particular security hole which might be exploited. + Unfortunately, this same information is often used for other valuable + purposes for which HTTP currently has no better mechanism. + +15.1.3 Encoding Sensitive Information in URI's + + Because the source of a link might be private information or might + reveal an otherwise private information source, it is strongly + recommended that the user be able to select whether or not the + Referer field is sent. For example, a browser client could have a + toggle switch for browsing openly/anonymously, which would + respectively enable/disable the sending of Referer and From + information. + + Clients SHOULD NOT include a Referer header field in a (non-secure) + HTTP request if the referring page was transferred with a secure + protocol. + + Authors of services which use the HTTP protocol SHOULD NOT use GET + based forms for the submission of sensitive data, because this will + cause this data to be encoded in the Request-URI. Many existing + servers, proxies, and user agents will log the request URI in some + place where it might be visible to third parties. Servers can use + POST-based form submission instead + +15.1.4 Privacy Issues Connected to Accept Headers + + Accept request-headers can reveal information about the user to all + servers which are accessed. The Accept-Language header in particular + can reveal information the user would consider to be of a private + nature, because the understanding of particular languages is often + + + +Fielding, et al. Standards Track [Page 152] + +RFC 2616 HTTP/1.1 June 1999 + + + strongly correlated to the membership of a particular ethnic group. + User agents which offer the option to configure the contents of an + Accept-Language header to be sent in every request are strongly + encouraged to let the configuration process include a message which + makes the user aware of the loss of privacy involved. + + An approach that limits the loss of privacy would be for a user agent + to omit the sending of Accept-Language headers by default, and to ask + the user whether or not to start sending Accept-Language headers to a + server if it detects, by looking for any Vary response-header fields + generated by the server, that such sending could improve the quality + of service. + + Elaborate user-customized accept header fields sent in every request, + in particular if these include quality values, can be used by servers + as relatively reliable and long-lived user identifiers. Such user + identifiers would allow content providers to do click-trail tracking, + and would allow collaborating content providers to match cross-server + click-trails or form submissions of individual users. Note that for + many users not behind a proxy, the network address of the host + running the user agent will also serve as a long-lived user + identifier. In environments where proxies are used to enhance + privacy, user agents ought to be conservative in offering accept + header configuration options to end users. As an extreme privacy + measure, proxies could filter the accept headers in relayed requests. + General purpose user agents which provide a high degree of header + configurability SHOULD warn users about the loss of privacy which can + be involved. + +15.2 Attacks Based On File and Path Names + + Implementations of HTTP origin servers SHOULD be careful to restrict + the documents returned by HTTP requests to be only those that were + intended by the server administrators. If an HTTP server translates + HTTP URIs directly into file system calls, the server MUST take + special care not to serve files that were not intended to be + delivered to HTTP clients. For example, UNIX, Microsoft Windows, and + other operating systems use ".." as a path component to indicate a + directory level above the current one. On such a system, an HTTP + server MUST disallow any such construct in the Request-URI if it + would otherwise allow access to a resource outside those intended to + be accessible via the HTTP server. Similarly, files intended for + reference only internally to the server (such as access control + files, configuration files, and script code) MUST be protected from + inappropriate retrieval, since they might contain sensitive + information. Experience has shown that minor bugs in such HTTP server + implementations have turned into security risks. + + + + +Fielding, et al. Standards Track [Page 153] + +RFC 2616 HTTP/1.1 June 1999 + + +15.3 DNS Spoofing + + Clients using HTTP rely heavily on the Domain Name Service, and are + thus generally prone to security attacks based on the deliberate + mis-association of IP addresses and DNS names. Clients need to be + cautious in assuming the continuing validity of an IP number/DNS name + association. + + In particular, HTTP clients SHOULD rely on their name resolver for + confirmation of an IP number/DNS name association, rather than + caching the result of previous host name lookups. Many platforms + already can cache host name lookups locally when appropriate, and + they SHOULD be configured to do so. It is proper for these lookups to + be cached, however, only when the TTL (Time To Live) information + reported by the name server makes it likely that the cached + information will remain useful. + + If HTTP clients cache the results of host name lookups in order to + achieve a performance improvement, they MUST observe the TTL + information reported by DNS. + + If HTTP clients do not observe this rule, they could be spoofed when + a previously-accessed server's IP address changes. As network + renumbering is expected to become increasingly common [24], the + possibility of this form of attack will grow. Observing this + requirement thus reduces this potential security vulnerability. + + This requirement also improves the load-balancing behavior of clients + for replicated servers using the same DNS name and reduces the + likelihood of a user's experiencing failure in accessing sites which + use that strategy. + +15.4 Location Headers and Spoofing + + If a single server supports multiple organizations that do not trust + one another, then it MUST check the values of Location and Content- + Location headers in responses that are generated under control of + said organizations to make sure that they do not attempt to + invalidate resources over which they have no authority. + +15.5 Content-Disposition Issues + + RFC 1806 [35], from which the often implemented Content-Disposition + (see section 19.5.1) header in HTTP is derived, has a number of very + serious security considerations. Content-Disposition is not part of + the HTTP standard, but since it is widely implemented, we are + documenting its use and risks for implementors. See RFC 2183 [49] + (which updates RFC 1806) for details. + + + +Fielding, et al. Standards Track [Page 154] + +RFC 2616 HTTP/1.1 June 1999 + + +15.6 Authentication Credentials and Idle Clients + + Existing HTTP clients and user agents typically retain authentication + information indefinitely. HTTP/1.1. does not provide a method for a + server to direct clients to discard these cached credentials. This is + a significant defect that requires further extensions to HTTP. + Circumstances under which credential caching can interfere with the + application's security model include but are not limited to: + + - Clients which have been idle for an extended period following + which the server might wish to cause the client to reprompt the + user for credentials. + + - Applications which include a session termination indication + (such as a `logout' or `commit' button on a page) after which + the server side of the application `knows' that there is no + further reason for the client to retain the credentials. + + This is currently under separate study. There are a number of work- + arounds to parts of this problem, and we encourage the use of + password protection in screen savers, idle time-outs, and other + methods which mitigate the security problems inherent in this + problem. In particular, user agents which cache credentials are + encouraged to provide a readily accessible mechanism for discarding + cached credentials under user control. + +15.7 Proxies and Caching + + By their very nature, HTTP proxies are men-in-the-middle, and + represent an opportunity for man-in-the-middle attacks. Compromise of + the systems on which the proxies run can result in serious security + and privacy problems. Proxies have access to security-related + information, personal information about individual users and + organizations, and proprietary information belonging to users and + content providers. A compromised proxy, or a proxy implemented or + configured without regard to security and privacy considerations, + might be used in the commission of a wide range of potential attacks. + + Proxy operators should protect the systems on which proxies run as + they would protect any system that contains or transports sensitive + information. In particular, log information gathered at proxies often + contains highly sensitive personal information, and/or information + about organizations. Log information should be carefully guarded, and + appropriate guidelines for use developed and followed. (Section + 15.1.1). + + + + + + +Fielding, et al. Standards Track [Page 155] + +RFC 2616 HTTP/1.1 June 1999 + + + Caching proxies provide additional potential vulnerabilities, since + the contents of the cache represent an attractive target for + malicious exploitation. Because cache contents persist after an HTTP + request is complete, an attack on the cache can reveal information + long after a user believes that the information has been removed from + the network. Therefore, cache contents should be protected as + sensitive information. + + Proxy implementors should consider the privacy and security + implications of their design and coding decisions, and of the + configuration options they provide to proxy operators (especially the + default configuration). + + Users of a proxy need to be aware that they are no trustworthier than + the people who run the proxy; HTTP itself cannot solve this problem. + + The judicious use of cryptography, when appropriate, may suffice to + protect against a broad range of security and privacy attacks. Such + cryptography is beyond the scope of the HTTP/1.1 specification. + +15.7.1 Denial of Service Attacks on Proxies + + They exist. They are hard to defend against. Research continues. + Beware. + +16 Acknowledgments + + This specification makes heavy use of the augmented BNF and generic + constructs defined by David H. Crocker for RFC 822 [9]. Similarly, it + reuses many of the definitions provided by Nathaniel Borenstein and + Ned Freed for MIME [7]. We hope that their inclusion in this + specification will help reduce past confusion over the relationship + between HTTP and Internet mail message formats. + + The HTTP protocol has evolved considerably over the years. It has + benefited from a large and active developer community--the many + people who have participated on the www-talk mailing list--and it is + that community which has been most responsible for the success of + HTTP and of the World-Wide Web in general. Marc Andreessen, Robert + Cailliau, Daniel W. Connolly, Bob Denny, John Franks, Jean-Francois + Groff, Phillip M. Hallam-Baker, Hakon W. Lie, Ari Luotonen, Rob + McCool, Lou Montulli, Dave Raggett, Tony Sanders, and Marc + VanHeyningen deserve special recognition for their efforts in + defining early aspects of the protocol. + + This document has benefited greatly from the comments of all those + participating in the HTTP-WG. In addition to those already mentioned, + the following individuals have contributed to this specification: + + + +Fielding, et al. Standards Track [Page 156] + +RFC 2616 HTTP/1.1 June 1999 + + + Gary Adams Ross Patterson + Harald Tveit Alvestrand Albert Lunde + Keith Ball John C. Mallery + Brian Behlendorf Jean-Philippe Martin-Flatin + Paul Burchard Mitra + Maurizio Codogno David Morris + Mike Cowlishaw Gavin Nicol + Roman Czyborra Bill Perry + Michael A. Dolan Jeffrey Perry + David J. Fiander Scott Powers + Alan Freier Owen Rees + Marc Hedlund Luigi Rizzo + Greg Herlihy David Robinson + Koen Holtman Marc Salomon + Alex Hopmann Rich Salz + Bob Jernigan Allan M. Schiffman + Shel Kaphan Jim Seidman + Rohit Khare Chuck Shotton + John Klensin Eric W. Sink + Martijn Koster Simon E. Spero + Alexei Kosut Richard N. Taylor + David M. Kristol Robert S. Thau + Daniel LaLiberte Bill (BearHeart) Weinman + Ben Laurie Francois Yergeau + Paul J. Leach Mary Ellen Zurko + Daniel DuBois Josh Cohen + + + Much of the content and presentation of the caching design is due to + suggestions and comments from individuals including: Shel Kaphan, + Paul Leach, Koen Holtman, David Morris, and Larry Masinter. + + Most of the specification of ranges is based on work originally done + by Ari Luotonen and John Franks, with additional input from Steve + Zilles. + + Thanks to the "cave men" of Palo Alto. You know who you are. + + Jim Gettys (the current editor of this document) wishes particularly + to thank Roy Fielding, the previous editor of this document, along + with John Klensin, Jeff Mogul, Paul Leach, Dave Kristol, Koen + Holtman, John Franks, Josh Cohen, Alex Hopmann, Scott Lawrence, and + Larry Masinter for their help. And thanks go particularly to Jeff + Mogul and Scott Lawrence for performing the "MUST/MAY/SHOULD" audit. + + + + + + + +Fielding, et al. Standards Track [Page 157] + +RFC 2616 HTTP/1.1 June 1999 + + + The Apache Group, Anselm Baird-Smith, author of Jigsaw, and Henrik + Frystyk implemented RFC 2068 early, and we wish to thank them for the + discovery of many of the problems that this document attempts to + rectify. + +17 References + + [1] Alvestrand, H., "Tags for the Identification of Languages", RFC + 1766, March 1995. + + [2] Anklesaria, F., McCahill, M., Lindner, P., Johnson, D., Torrey, + D. and B. Alberti, "The Internet Gopher Protocol (a distributed + document search and retrieval protocol)", RFC 1436, March 1993. + + [3] Berners-Lee, T., "Universal Resource Identifiers in WWW", RFC + 1630, June 1994. + + [4] Berners-Lee, T., Masinter, L. and M. McCahill, "Uniform Resource + Locators (URL)", RFC 1738, December 1994. + + [5] Berners-Lee, T. and D. Connolly, "Hypertext Markup Language - + 2.0", RFC 1866, November 1995. + + [6] Berners-Lee, T., Fielding, R. and H. Frystyk, "Hypertext Transfer + Protocol -- HTTP/1.0", RFC 1945, May 1996. + + [7] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part One: Format of Internet Message Bodies", + RFC 2045, November 1996. + + [8] Braden, R., "Requirements for Internet Hosts -- Communication + Layers", STD 3, RFC 1123, October 1989. + + [9] Crocker, D., "Standard for The Format of ARPA Internet Text + Messages", STD 11, RFC 822, August 1982. + + [10] Davis, F., Kahle, B., Morris, H., Salem, J., Shen, T., Wang, R., + Sui, J., and M. Grinbaum, "WAIS Interface Protocol Prototype + Functional Specification," (v1.5), Thinking Machines + Corporation, April 1990. + + [11] Fielding, R., "Relative Uniform Resource Locators", RFC 1808, + June 1995. + + [12] Horton, M. and R. Adams, "Standard for Interchange of USENET + Messages", RFC 1036, December 1987. + + + + + +Fielding, et al. Standards Track [Page 158] + +RFC 2616 HTTP/1.1 June 1999 + + + [13] Kantor, B. and P. Lapsley, "Network News Transfer Protocol", RFC + 977, February 1986. + + [14] Moore, K., "MIME (Multipurpose Internet Mail Extensions) Part + Three: Message Header Extensions for Non-ASCII Text", RFC 2047, + November 1996. + + [15] Nebel, E. and L. Masinter, "Form-based File Upload in HTML", RFC + 1867, November 1995. + + [16] Postel, J., "Simple Mail Transfer Protocol", STD 10, RFC 821, + August 1982. + + [17] Postel, J., "Media Type Registration Procedure", RFC 1590, + November 1996. + + [18] Postel, J. and J. Reynolds, "File Transfer Protocol", STD 9, RFC + 959, October 1985. + + [19] Reynolds, J. and J. Postel, "Assigned Numbers", STD 2, RFC 1700, + October 1994. + + [20] Sollins, K. and L. Masinter, "Functional Requirements for + Uniform Resource Names", RFC 1737, December 1994. + + [21] US-ASCII. Coded Character Set - 7-Bit American Standard Code for + Information Interchange. Standard ANSI X3.4-1986, ANSI, 1986. + + [22] ISO-8859. International Standard -- Information Processing -- + 8-bit Single-Byte Coded Graphic Character Sets -- + Part 1: Latin alphabet No. 1, ISO-8859-1:1987. + Part 2: Latin alphabet No. 2, ISO-8859-2, 1987. + Part 3: Latin alphabet No. 3, ISO-8859-3, 1988. + Part 4: Latin alphabet No. 4, ISO-8859-4, 1988. + Part 5: Latin/Cyrillic alphabet, ISO-8859-5, 1988. + Part 6: Latin/Arabic alphabet, ISO-8859-6, 1987. + Part 7: Latin/Greek alphabet, ISO-8859-7, 1987. + Part 8: Latin/Hebrew alphabet, ISO-8859-8, 1988. + Part 9: Latin alphabet No. 5, ISO-8859-9, 1990. + + [23] Meyers, J. and M. Rose, "The Content-MD5 Header Field", RFC + 1864, October 1995. + + [24] Carpenter, B. and Y. Rekhter, "Renumbering Needs Work", RFC + 1900, February 1996. + + [25] Deutsch, P., "GZIP file format specification version 4.3", RFC + 1952, May 1996. + + + +Fielding, et al. Standards Track [Page 159] + +RFC 2616 HTTP/1.1 June 1999 + + + [26] Venkata N. Padmanabhan, and Jeffrey C. Mogul. "Improving HTTP + Latency", Computer Networks and ISDN Systems, v. 28, pp. 25-35, + Dec. 1995. Slightly revised version of paper in Proc. 2nd + International WWW Conference '94: Mosaic and the Web, Oct. 1994, + which is available at + http://www.ncsa.uiuc.edu/SDG/IT94/Proceedings/DDay/mogul/HTTPLat + ency.html. + + [27] Joe Touch, John Heidemann, and Katia Obraczka. "Analysis of HTTP + Performance", , + ISI Research Report ISI/RR-98-463, (original report dated Aug. + 1996), USC/Information Sciences Institute, August 1998. + + [28] Mills, D., "Network Time Protocol (Version 3) Specification, + Implementation and Analysis", RFC 1305, March 1992. + + [29] Deutsch, P., "DEFLATE Compressed Data Format Specification + version 1.3", RFC 1951, May 1996. + + [30] S. Spero, "Analysis of HTTP Performance Problems," + http://sunsite.unc.edu/mdma-release/http-prob.html. + + [31] Deutsch, P. and J. Gailly, "ZLIB Compressed Data Format + Specification version 3.3", RFC 1950, May 1996. + + [32] Franks, J., Hallam-Baker, P., Hostetler, J., Leach, P., + Luotonen, A., Sink, E. and L. Stewart, "An Extension to HTTP: + Digest Access Authentication", RFC 2069, January 1997. + + [33] Fielding, R., Gettys, J., Mogul, J., Frystyk, H. and T. + Berners-Lee, "Hypertext Transfer Protocol -- HTTP/1.1", RFC + 2068, January 1997. + + [34] Bradner, S., "Key words for use in RFCs to Indicate Requirement + Levels", BCP 14, RFC 2119, March 1997. + + [35] Troost, R. and Dorner, S., "Communicating Presentation + Information in Internet Messages: The Content-Disposition + Header", RFC 1806, June 1995. + + [36] Mogul, J., Fielding, R., Gettys, J. and H. Frystyk, "Use and + Interpretation of HTTP Version Numbers", RFC 2145, May 1997. + [jg639] + + [37] Palme, J., "Common Internet Message Headers", RFC 2076, February + 1997. [jg640] + + + + + +Fielding, et al. Standards Track [Page 160] + +RFC 2616 HTTP/1.1 June 1999 + + + [38] Yergeau, F., "UTF-8, a transformation format of Unicode and + ISO-10646", RFC 2279, January 1998. [jg641] + + [39] Nielsen, H.F., Gettys, J., Baird-Smith, A., Prud'hommeaux, E., + Lie, H., and C. Lilley. "Network Performance Effects of + HTTP/1.1, CSS1, and PNG," Proceedings of ACM SIGCOMM '97, Cannes + France, September 1997.[jg642] + + [40] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Two: Media Types", RFC 2046, November + 1996. [jg643] + + [41] Alvestrand, H., "IETF Policy on Character Sets and Languages", + BCP 18, RFC 2277, January 1998. [jg644] + + [42] Berners-Lee, T., Fielding, R. and L. Masinter, "Uniform Resource + Identifiers (URI): Generic Syntax and Semantics", RFC 2396, + August 1998. [jg645] + + [43] Franks, J., Hallam-Baker, P., Hostetler, J., Lawrence, S., + Leach, P., Luotonen, A., Sink, E. and L. Stewart, "HTTP + Authentication: Basic and Digest Access Authentication", RFC + 2617, June 1999. [jg646] + + [44] Luotonen, A., "Tunneling TCP based protocols through Web proxy + servers," Work in Progress. [jg647] + + [45] Palme, J. and A. Hopmann, "MIME E-mail Encapsulation of + Aggregate Documents, such as HTML (MHTML)", RFC 2110, March + 1997. + + [46] Bradner, S., "The Internet Standards Process -- Revision 3", BCP + 9, RFC 2026, October 1996. + + [47] Masinter, L., "Hyper Text Coffee Pot Control Protocol + (HTCPCP/1.0)", RFC 2324, 1 April 1998. + + [48] Freed, N. and N. Borenstein, "Multipurpose Internet Mail + Extensions (MIME) Part Five: Conformance Criteria and Examples", + RFC 2049, November 1996. + + [49] Troost, R., Dorner, S. and K. Moore, "Communicating Presentation + Information in Internet Messages: The Content-Disposition Header + Field", RFC 2183, August 1997. + + + + + + + +Fielding, et al. Standards Track [Page 161] + +RFC 2616 HTTP/1.1 June 1999 + + +18 Authors' Addresses + + Roy T. Fielding + Information and Computer Science + University of California, Irvine + Irvine, CA 92697-3425, USA + + Fax: +1 (949) 824-1715 + EMail: fielding@ics.uci.edu + + + James Gettys + World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: jg@w3.org + + + Jeffrey C. Mogul + Western Research Laboratory + Compaq Computer Corporation + 250 University Avenue + Palo Alto, California, 94305, USA + + EMail: mogul@wrl.dec.com + + + Henrik Frystyk Nielsen + World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: frystyk@w3.org + + + Larry Masinter + Xerox Corporation + 3333 Coyote Hill Road + Palo Alto, CA 94034, USA + + EMail: masinter@parc.xerox.com + + + + + +Fielding, et al. Standards Track [Page 162] + +RFC 2616 HTTP/1.1 June 1999 + + + Paul J. Leach + Microsoft Corporation + 1 Microsoft Way + Redmond, WA 98052, USA + + EMail: paulle@microsoft.com + + + Tim Berners-Lee + Director, World Wide Web Consortium + MIT Laboratory for Computer Science + 545 Technology Square + Cambridge, MA 02139, USA + + Fax: +1 (617) 258 8682 + EMail: timbl@w3.org + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 163] + +RFC 2616 HTTP/1.1 June 1999 + + +19 Appendices + +19.1 Internet Media Type message/http and application/http + + In addition to defining the HTTP/1.1 protocol, this document serves + as the specification for the Internet media type "message/http" and + "application/http". The message/http type can be used to enclose a + single HTTP request or response message, provided that it obeys the + MIME restrictions for all "message" types regarding line length and + encodings. The application/http type can be used to enclose a + pipeline of one or more HTTP request or response messages (not + intermixed). The following is to be registered with IANA [17]. + + Media Type name: message + Media subtype name: http + Required parameters: none + Optional parameters: version, msgtype + version: The HTTP-Version number of the enclosed message + (e.g., "1.1"). If not present, the version can be + determined from the first line of the body. + msgtype: The message type -- "request" or "response". If not + present, the type can be determined from the first + line of the body. + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + Security considerations: none + + Media Type name: application + Media subtype name: http + Required parameters: none + Optional parameters: version, msgtype + version: The HTTP-Version number of the enclosed messages + (e.g., "1.1"). If not present, the version can be + determined from the first line of the body. + msgtype: The message type -- "request" or "response". If not + present, the type can be determined from the first + line of the body. + Encoding considerations: HTTP messages enclosed by this type + are in "binary" format; use of an appropriate + Content-Transfer-Encoding is required when + transmitted via E-mail. + Security considerations: none + + + + + + + + + +Fielding, et al. Standards Track [Page 164] + +RFC 2616 HTTP/1.1 June 1999 + + +19.2 Internet Media Type multipart/byteranges + + When an HTTP 206 (Partial Content) response message includes the + content of multiple ranges (a response to a request for multiple + non-overlapping ranges), these are transmitted as a multipart + message-body. The media type for this purpose is called + "multipart/byteranges". + + The multipart/byteranges media type includes two or more parts, each + with its own Content-Type and Content-Range fields. The required + boundary parameter specifies the boundary string used to separate + each body-part. + + Media Type name: multipart + Media subtype name: byteranges + Required parameters: boundary + Optional parameters: none + Encoding considerations: only "7bit", "8bit", or "binary" are + permitted + Security considerations: none + + + For example: + + HTTP/1.1 206 Partial Content + Date: Wed, 15 Nov 1995 06:25:24 GMT + Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT + Content-type: multipart/byteranges; boundary=THIS_STRING_SEPARATES + + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 500-999/8000 + + ...the first range... + --THIS_STRING_SEPARATES + Content-type: application/pdf + Content-range: bytes 7000-7999/8000 + + ...the second range + --THIS_STRING_SEPARATES-- + + Notes: + + 1) Additional CRLFs may precede the first boundary string in the + entity. + + + + + + +Fielding, et al. Standards Track [Page 165] + +RFC 2616 HTTP/1.1 June 1999 + + + 2) Although RFC 2046 [40] permits the boundary string to be + quoted, some existing implementations handle a quoted boundary + string incorrectly. + + 3) A number of browsers and servers were coded to an early draft + of the byteranges specification to use a media type of + multipart/x-byteranges, which is almost, but not quite + compatible with the version documented in HTTP/1.1. + +19.3 Tolerant Applications + + Although this document specifies the requirements for the generation + of HTTP/1.1 messages, not all applications will be correct in their + implementation. We therefore recommend that operational applications + be tolerant of deviations whenever those deviations can be + interpreted unambiguously. + + Clients SHOULD be tolerant in parsing the Status-Line and servers + tolerant when parsing the Request-Line. In particular, they SHOULD + accept any amount of SP or HT characters between fields, even though + only a single SP is required. + + The line terminator for message-header fields is the sequence CRLF. + However, we recommend that applications, when parsing such headers, + recognize a single LF as a line terminator and ignore the leading CR. + + The character set of an entity-body SHOULD be labeled as the lowest + common denominator of the character codes used within that body, with + the exception that not labeling the entity is preferred over labeling + the entity with the labels US-ASCII or ISO-8859-1. See section 3.7.1 + and 3.4.1. + + Additional rules for requirements on parsing and encoding of dates + and other potential problems with date encodings include: + + - HTTP/1.1 clients and caches SHOULD assume that an RFC-850 date + which appears to be more than 50 years in the future is in fact + in the past (this helps solve the "year 2000" problem). + + - An HTTP/1.1 implementation MAY internally represent a parsed + Expires date as earlier than the proper value, but MUST NOT + internally represent a parsed Expires date as later than the + proper value. + + - All expiration-related calculations MUST be done in GMT. The + local time zone MUST NOT influence the calculation or comparison + of an age or expiration time. + + + + +Fielding, et al. Standards Track [Page 166] + +RFC 2616 HTTP/1.1 June 1999 + + + - If an HTTP header incorrectly carries a date value with a time + zone other than GMT, it MUST be converted into GMT using the + most conservative possible conversion. + +19.4 Differences Between HTTP Entities and RFC 2045 Entities + + HTTP/1.1 uses many of the constructs defined for Internet Mail (RFC + 822 [9]) and the Multipurpose Internet Mail Extensions (MIME [7]) to + allow entities to be transmitted in an open variety of + representations and with extensible mechanisms. However, RFC 2045 + discusses mail, and HTTP has a few features that are different from + those described in RFC 2045. These differences were carefully chosen + to optimize performance over binary connections, to allow greater + freedom in the use of new media types, to make date comparisons + easier, and to acknowledge the practice of some early HTTP servers + and clients. + + This appendix describes specific areas where HTTP differs from RFC + 2045. Proxies and gateways to strict MIME environments SHOULD be + aware of these differences and provide the appropriate conversions + where necessary. Proxies and gateways from MIME environments to HTTP + also need to be aware of the differences because some conversions + might be required. + +19.4.1 MIME-Version + + HTTP is not a MIME-compliant protocol. However, HTTP/1.1 messages MAY + include a single MIME-Version general-header field to indicate what + version of the MIME protocol was used to construct the message. Use + of the MIME-Version header field indicates that the message is in + full compliance with the MIME protocol (as defined in RFC 2045[7]). + Proxies/gateways are responsible for ensuring full compliance (where + possible) when exporting HTTP messages to strict MIME environments. + + MIME-Version = "MIME-Version" ":" 1*DIGIT "." 1*DIGIT + + MIME version "1.0" is the default for use in HTTP/1.1. However, + HTTP/1.1 message parsing and semantics are defined by this document + and not the MIME specification. + +19.4.2 Conversion to Canonical Form + + RFC 2045 [7] requires that an Internet mail entity be converted to + canonical form prior to being transferred, as described in section 4 + of RFC 2049 [48]. Section 3.7.1 of this document describes the forms + allowed for subtypes of the "text" media type when transmitted over + HTTP. RFC 2046 requires that content with a type of "text" represent + line breaks as CRLF and forbids the use of CR or LF outside of line + + + +Fielding, et al. Standards Track [Page 167] + +RFC 2616 HTTP/1.1 June 1999 + + + break sequences. HTTP allows CRLF, bare CR, and bare LF to indicate a + line break within text content when a message is transmitted over + HTTP. + + Where it is possible, a proxy or gateway from HTTP to a strict MIME + environment SHOULD translate all line breaks within the text media + types described in section 3.7.1 of this document to the RFC 2049 + canonical form of CRLF. Note, however, that this might be complicated + by the presence of a Content-Encoding and by the fact that HTTP + allows the use of some character sets which do not use octets 13 and + 10 to represent CR and LF, as is the case for some multi-byte + character sets. + + Implementors should note that conversion will break any cryptographic + checksums applied to the original content unless the original content + is already in canonical form. Therefore, the canonical form is + recommended for any content that uses such checksums in HTTP. + +19.4.3 Conversion of Date Formats + + HTTP/1.1 uses a restricted set of date formats (section 3.3.1) to + simplify the process of date comparison. Proxies and gateways from + other protocols SHOULD ensure that any Date header field present in a + message conforms to one of the HTTP/1.1 formats and rewrite the date + if necessary. + +19.4.4 Introduction of Content-Encoding + + RFC 2045 does not include any concept equivalent to HTTP/1.1's + Content-Encoding header field. Since this acts as a modifier on the + media type, proxies and gateways from HTTP to MIME-compliant + protocols MUST either change the value of the Content-Type header + field or decode the entity-body before forwarding the message. (Some + experimental applications of Content-Type for Internet mail have used + a media-type parameter of ";conversions=" to perform + a function equivalent to Content-Encoding. However, this parameter is + not part of RFC 2045.) + +19.4.5 No Content-Transfer-Encoding + + HTTP does not use the Content-Transfer-Encoding (CTE) field of RFC + 2045. Proxies and gateways from MIME-compliant protocols to HTTP MUST + remove any non-identity CTE ("quoted-printable" or "base64") encoding + prior to delivering the response message to an HTTP client. + + Proxies and gateways from HTTP to MIME-compliant protocols are + responsible for ensuring that the message is in the correct format + and encoding for safe transport on that protocol, where "safe + + + +Fielding, et al. Standards Track [Page 168] + +RFC 2616 HTTP/1.1 June 1999 + + + transport" is defined by the limitations of the protocol being used. + Such a proxy or gateway SHOULD label the data with an appropriate + Content-Transfer-Encoding if doing so will improve the likelihood of + safe transport over the destination protocol. + +19.4.6 Introduction of Transfer-Encoding + + HTTP/1.1 introduces the Transfer-Encoding header field (section + 14.41). Proxies/gateways MUST remove any transfer-coding prior to + forwarding a message via a MIME-compliant protocol. + + A process for decoding the "chunked" transfer-coding (section 3.6) + can be represented in pseudo-code as: + + length := 0 + read chunk-size, chunk-extension (if any) and CRLF + while (chunk-size > 0) { + read chunk-data and CRLF + append chunk-data to entity-body + length := length + chunk-size + read chunk-size and CRLF + } + read entity-header + while (entity-header not empty) { + append entity-header to existing header fields + read entity-header + } + Content-Length := length + Remove "chunked" from Transfer-Encoding + +19.4.7 MHTML and Line Length Limitations + + HTTP implementations which share code with MHTML [45] implementations + need to be aware of MIME line length limitations. Since HTTP does not + have this limitation, HTTP does not fold long lines. MHTML messages + being transported by HTTP follow all conventions of MHTML, including + line length limitations and folding, canonicalization, etc., since + HTTP transports all message-bodies as payload (see section 3.7.2) and + does not interpret the content or any MIME header lines that might be + contained therein. + +19.5 Additional Features + + RFC 1945 and RFC 2068 document protocol elements used by some + existing HTTP implementations, but not consistently and correctly + across most HTTP/1.1 applications. Implementors are advised to be + aware of these features, but cannot rely upon their presence in, or + interoperability with, other HTTP/1.1 applications. Some of these + + + +Fielding, et al. Standards Track [Page 169] + +RFC 2616 HTTP/1.1 June 1999 + + + describe proposed experimental features, and some describe features + that experimental deployment found lacking that are now addressed in + the base HTTP/1.1 specification. + + A number of other headers, such as Content-Disposition and Title, + from SMTP and MIME are also often implemented (see RFC 2076 [37]). + +19.5.1 Content-Disposition + + The Content-Disposition response-header field has been proposed as a + means for the origin server to suggest a default filename if the user + requests that the content is saved to a file. This usage is derived + from the definition of Content-Disposition in RFC 1806 [35]. + + content-disposition = "Content-Disposition" ":" + disposition-type *( ";" disposition-parm ) + disposition-type = "attachment" | disp-extension-token + disposition-parm = filename-parm | disp-extension-parm + filename-parm = "filename" "=" quoted-string + disp-extension-token = token + disp-extension-parm = token "=" ( token | quoted-string ) + + An example is + + Content-Disposition: attachment; filename="fname.ext" + + The receiving user agent SHOULD NOT respect any directory path + information present in the filename-parm parameter, which is the only + parameter believed to apply to HTTP implementations at this time. The + filename SHOULD be treated as a terminal component only. + + If this header is used in a response with the application/octet- + stream content-type, the implied suggestion is that the user agent + should not display the response, but directly enter a `save response + as...' dialog. + + See section 15.5 for Content-Disposition security issues. + +19.6 Compatibility with Previous Versions + + It is beyond the scope of a protocol specification to mandate + compliance with previous versions. HTTP/1.1 was deliberately + designed, however, to make supporting previous versions easy. It is + worth noting that, at the time of composing this specification + (1996), we would expect commercial HTTP/1.1 servers to: + + - recognize the format of the Request-Line for HTTP/0.9, 1.0, and + 1.1 requests; + + + +Fielding, et al. Standards Track [Page 170] + +RFC 2616 HTTP/1.1 June 1999 + + + - understand any valid request in the format of HTTP/0.9, 1.0, or + 1.1; + + - respond appropriately with a message in the same major version + used by the client. + + And we would expect HTTP/1.1 clients to: + + - recognize the format of the Status-Line for HTTP/1.0 and 1.1 + responses; + + - understand any valid response in the format of HTTP/0.9, 1.0, or + 1.1. + + For most implementations of HTTP/1.0, each connection is established + by the client prior to the request and closed by the server after + sending the response. Some implementations implement the Keep-Alive + version of persistent connections described in section 19.7.1 of RFC + 2068 [33]. + +19.6.1 Changes from HTTP/1.0 + + This section summarizes major differences between versions HTTP/1.0 + and HTTP/1.1. + +19.6.1.1 Changes to Simplify Multi-homed Web Servers and Conserve IP + Addresses + + The requirements that clients and servers support the Host request- + header, report an error if the Host request-header (section 14.23) is + missing from an HTTP/1.1 request, and accept absolute URIs (section + 5.1.2) are among the most important changes defined by this + specification. + + Older HTTP/1.0 clients assumed a one-to-one relationship of IP + addresses and servers; there was no other established mechanism for + distinguishing the intended server of a request than the IP address + to which that request was directed. The changes outlined above will + allow the Internet, once older HTTP clients are no longer common, to + support multiple Web sites from a single IP address, greatly + simplifying large operational Web servers, where allocation of many + IP addresses to a single host has created serious problems. The + Internet will also be able to recover the IP addresses that have been + allocated for the sole purpose of allowing special-purpose domain + names to be used in root-level HTTP URLs. Given the rate of growth of + the Web, and the number of servers already deployed, it is extremely + + + + + +Fielding, et al. Standards Track [Page 171] + +RFC 2616 HTTP/1.1 June 1999 + + + important that all implementations of HTTP (including updates to + existing HTTP/1.0 applications) correctly implement these + requirements: + + - Both clients and servers MUST support the Host request-header. + + - A client that sends an HTTP/1.1 request MUST send a Host header. + + - Servers MUST report a 400 (Bad Request) error if an HTTP/1.1 + request does not include a Host request-header. + + - Servers MUST accept absolute URIs. + +19.6.2 Compatibility with HTTP/1.0 Persistent Connections + + Some clients and servers might wish to be compatible with some + previous implementations of persistent connections in HTTP/1.0 + clients and servers. Persistent connections in HTTP/1.0 are + explicitly negotiated as they are not the default behavior. HTTP/1.0 + experimental implementations of persistent connections are faulty, + and the new facilities in HTTP/1.1 are designed to rectify these + problems. The problem was that some existing 1.0 clients may be + sending Keep-Alive to a proxy server that doesn't understand + Connection, which would then erroneously forward it to the next + inbound server, which would establish the Keep-Alive connection and + result in a hung HTTP/1.0 proxy waiting for the close on the + response. The result is that HTTP/1.0 clients must be prevented from + using Keep-Alive when talking to proxies. + + However, talking to proxies is the most important use of persistent + connections, so that prohibition is clearly unacceptable. Therefore, + we need some other mechanism for indicating a persistent connection + is desired, which is safe to use even when talking to an old proxy + that ignores Connection. Persistent connections are the default for + HTTP/1.1 messages; we introduce a new keyword (Connection: close) for + declaring non-persistence. See section 14.10. + + The original HTTP/1.0 form of persistent connections (the Connection: + Keep-Alive and Keep-Alive header) is documented in RFC 2068. [33] + +19.6.3 Changes from RFC 2068 + + This specification has been carefully audited to correct and + disambiguate key word usage; RFC 2068 had many problems in respect to + the conventions laid out in RFC 2119 [34]. + + Clarified which error code should be used for inbound server failures + (e.g. DNS failures). (Section 10.5.5). + + + +Fielding, et al. Standards Track [Page 172] + +RFC 2616 HTTP/1.1 June 1999 + + + CREATE had a race that required an Etag be sent when a resource is + first created. (Section 10.2.2). + + Content-Base was deleted from the specification: it was not + implemented widely, and there is no simple, safe way to introduce it + without a robust extension mechanism. In addition, it is used in a + similar, but not identical fashion in MHTML [45]. + + Transfer-coding and message lengths all interact in ways that + required fixing exactly when chunked encoding is used (to allow for + transfer encoding that may not be self delimiting); it was important + to straighten out exactly how message lengths are computed. (Sections + 3.6, 4.4, 7.2.2, 13.5.2, 14.13, 14.16) + + A content-coding of "identity" was introduced, to solve problems + discovered in caching. (section 3.5) + + Quality Values of zero should indicate that "I don't want something" + to allow clients to refuse a representation. (Section 3.9) + + The use and interpretation of HTTP version numbers has been clarified + by RFC 2145. Require proxies to upgrade requests to highest protocol + version they support to deal with problems discovered in HTTP/1.0 + implementations (Section 3.1) + + Charset wildcarding is introduced to avoid explosion of character set + names in accept headers. (Section 14.2) + + A case was missed in the Cache-Control model of HTTP/1.1; s-maxage + was introduced to add this missing case. (Sections 13.4, 14.8, 14.9, + 14.9.3) + + The Cache-Control: max-age directive was not properly defined for + responses. (Section 14.9.3) + + There are situations where a server (especially a proxy) does not + know the full length of a response but is capable of serving a + byterange request. We therefore need a mechanism to allow byteranges + with a content-range not indicating the full length of the message. + (Section 14.16) + + Range request responses would become very verbose if all meta-data + were always returned; by allowing the server to only send needed + headers in a 206 response, this problem can be avoided. (Section + 10.2.7, 13.5.3, and 14.27) + + + + + + +Fielding, et al. Standards Track [Page 173] + +RFC 2616 HTTP/1.1 June 1999 + + + Fix problem with unsatisfiable range requests; there are two cases: + syntactic problems, and range doesn't exist in the document. The 416 + status code was needed to resolve this ambiguity needed to indicate + an error for a byte range request that falls outside of the actual + contents of a document. (Section 10.4.17, 14.16) + + Rewrite of message transmission requirements to make it much harder + for implementors to get it wrong, as the consequences of errors here + can have significant impact on the Internet, and to deal with the + following problems: + + 1. Changing "HTTP/1.1 or later" to "HTTP/1.1", in contexts where + this was incorrectly placing a requirement on the behavior of + an implementation of a future version of HTTP/1.x + + 2. Made it clear that user-agents should retry requests, not + "clients" in general. + + 3. Converted requirements for clients to ignore unexpected 100 + (Continue) responses, and for proxies to forward 100 responses, + into a general requirement for 1xx responses. + + 4. Modified some TCP-specific language, to make it clearer that + non-TCP transports are possible for HTTP. + + 5. Require that the origin server MUST NOT wait for the request + body before it sends a required 100 (Continue) response. + + 6. Allow, rather than require, a server to omit 100 (Continue) if + it has already seen some of the request body. + + 7. Allow servers to defend against denial-of-service attacks and + broken clients. + + This change adds the Expect header and 417 status code. The message + transmission requirements fixes are in sections 8.2, 10.4.18, + 8.1.2.2, 13.11, and 14.20. + + Proxies should be able to add Content-Length when appropriate. + (Section 13.5.2) + + Clean up confusion between 403 and 404 responses. (Section 10.4.4, + 10.4.5, and 10.4.11) + + Warnings could be cached incorrectly, or not updated appropriately. + (Section 13.1.2, 13.2.4, 13.5.2, 13.5.3, 14.9.3, and 14.46) Warning + also needed to be a general header, as PUT or other methods may have + need for it in requests. + + + +Fielding, et al. Standards Track [Page 174] + +RFC 2616 HTTP/1.1 June 1999 + + + Transfer-coding had significant problems, particularly with + interactions with chunked encoding. The solution is that transfer- + codings become as full fledged as content-codings. This involves + adding an IANA registry for transfer-codings (separate from content + codings), a new header field (TE) and enabling trailer headers in the + future. Transfer encoding is a major performance benefit, so it was + worth fixing [39]. TE also solves another, obscure, downward + interoperability problem that could have occurred due to interactions + between authentication trailers, chunked encoding and HTTP/1.0 + clients.(Section 3.6, 3.6.1, and 14.39) + + The PATCH, LINK, UNLINK methods were defined but not commonly + implemented in previous versions of this specification. See RFC 2068 + [33]. + + The Alternates, Content-Version, Derived-From, Link, URI, Public and + Content-Base header fields were defined in previous versions of this + specification, but not commonly implemented. See RFC 2068 [33]. + +20 Index + + Please see the PostScript version of this RFC for the INDEX. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 175] + +RFC 2616 HTTP/1.1 June 1999 + + +21. Full Copyright Statement + + Copyright (C) The Internet Society (1999). All Rights Reserved. + + This document and translations of it may be copied and furnished to + others, and derivative works that comment on or otherwise explain it + or assist in its implementation may be prepared, copied, published + and distributed, in whole or in part, without restriction of any + kind, provided that the above copyright notice and this paragraph are + included on all such copies and derivative works. However, this + document itself may not be modified in any way, such as by removing + the copyright notice or references to the Internet Society or other + Internet organizations, except as needed for the purpose of + developing Internet standards in which case the procedures for + copyrights defined in the Internet Standards process must be + followed, or as required to translate it into languages other than + English. + + The limited permissions granted above are perpetual and will not be + revoked by the Internet Society or its successors or assigns. + + This document and the information contained herein is provided on an + "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING + TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING + BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION + HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + +Acknowledgement + + Funding for the RFC Editor function is currently provided by the + Internet Society. + + + + + + + + + + + + + + + + + + + +Fielding, et al. Standards Track [Page 176] + diff --git a/src/arraylist.cpp b/src/arraylist.cpp new file mode 100644 index 0000000..ef21426 --- /dev/null +++ b/src/arraylist.cpp @@ -0,0 +1,100 @@ +#include "arraylist.h" +#include +#include + +ArrayList::ArrayList( int initSize, int growByFactor ) +{ + apData = new void *[initSize]; + nSize = 0; + nCapacity = initSize; + nGrowByFactor = growByFactor; +} + +ArrayList::~ArrayList( ) +{ + delete[] apData; +} + +void *ArrayList::getAt( int index ) +{ + if( index < 0 || index > nSize ) + return NULL; + + return apData[index]; +} + +void ArrayList::append( void *data ) +{ + insertBefore( data, nSize ); +} + +void ArrayList::insertBefore( void *data, int pos ) +{ + if( pos < 0 || pos > nSize ) + return; + + checkResize(); + memmove( &apData[pos+1], &apData[pos], (nSize-pos)*sizeof(void*) ); + apData[pos] = data; + nSize++; +} + +int ArrayList::getSize( ) +{ + return nSize; +} + +bool ArrayList::isEmpty( ) +{ + return nSize==0; +} + +void ArrayList::deleteAt( int index ) +{ + if( index < 0 || index >= nSize ) + return; + + memmove( &apData[index], &apData[index+1], (nSize-index-1)*sizeof(void *) ); + nSize--; +} + +void ArrayList::empty() +{ + // Probably the easiest as far as things go. + nSize = 0; +} + +void ArrayList::resizeTo( int newSize ) +{ + void **apNew = new void *[newSize]; + memmove( apNew, apData, nSize*sizeof(void *) ); + nCapacity = newSize; + delete[] apData; + apData = apNew; +} + +void ArrayList::checkResize() +{ + if( nSize >= nCapacity ) + { + resizeTo( nCapacity + nGrowByFactor ); + } +} + +void ArrayList::setSize( int newSize ) +{ + if( newSize < 0 ) + return; + + nSize = newSize; + checkResize(); +} + +void ArrayList::setAt( int index, void *data ) +{ + if( index < 0 || index >= nSize ) + return; + + apData[index] = data; +} + diff --git a/src/arraylist.h b/src/arraylist.h new file mode 100644 index 0000000..74992cf --- /dev/null +++ b/src/arraylist.h @@ -0,0 +1,80 @@ +/** \file arraylist.h + * Describes the ArrayList class. + *@author Mike Buland + */ +#ifndef ARRAY_LIST_H +#define ARRAY_LIST_H + +#include "list.h" + +/** A simple list which uses an array. This is a great choice if you won't do + * a lot of adding and deleting and need a fast random access list. Otherwise + * use the LinkedList. + *@author Mike Buland + */ +class ArrayList : public List +{ +public: + /** Creates an arraylist with some pre-defined specs spelled out. + *@param initSize the inital number of elements to allocate. + *@param growByFactor How much to increase the size of the array by + * each time we run out of room. + */ + ArrayList( int initSize=100, int growByFactor=10 ); + /** + * Destroy the ArrayList + */ + ~ArrayList(); + + void *getAt( int nIndex ); + void append( void *pData ); + void insertBefore( void *pData, int nPos = 0 ); + int getSize( ); + bool isEmpty( ); + void deleteAt( int nIndex ); + void empty(); + void setSize( int nNewSize ); + void setAt( int nIndex, void *pData ); + +private: + /** + * Checks to see if the system needs to be resized, if it does, this will + * automatically resize based on your parameters. + */ + void checkResize(); + + /** + * Resize the system to a specified size. If it is larger, then all data + * will be retained, if smaller the elements at the end will be cut off. + *@param newSize The number of elements to include after resizing. + */ + void resizeTo( int newSize ); + + /** + * Actual master array of pointers. This is done to follow the List specs. + * All data transactions are performed with pointers or compatable + * primitive data-types. + */ + void **apData; + + /** + * The number of filled in elements in the array. This is the practical + * real size of the ArrayList for all userspace applications. + */ + int nSize; + + /** + * The number of elements allocated in memory. Not all of these have to be + * filled in, and it is usually larger than nSize so that adding and + * deleting elements is fast and easy. + */ + int nCapacity; + + /** + * The amount to grow by whenever the array needs resizing. + */ + int nGrowByFactor; +}; + +#endif + diff --git a/src/cgi.cpp b/src/cgi.cpp new file mode 100644 index 0000000..1fecbbe --- /dev/null +++ b/src/cgi.cpp @@ -0,0 +1,644 @@ +#include +#include +#include +#include +#include + +#include "cgi.h" + +Cgi::Cgi( const char *strSource ) : + aContent( new HashFunctionString(), 151, true ) +{ + int length, j, k, mode = 0, slen = 0; + char hexbuf[3] = { 0, 0, 0 }; + char *buf, chr; + Item *cur = NULL; + int nCur = 0; + + if( strSource != NULL ) + { + loadContent( strSource ); + } + + if( ( getenv( "CONTENT_LENGTH" ) ) ) + { + if( !strcmp + ( getenv( "CONTENT_TYPE" ), + "application/x-www-form-urlencoded" ) ) + { + length = atoi( getenv( "CONTENT_LENGTH" ) ); + buf = new char[length + 1]; + fread( buf, 1, length, stdin ); + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + cur->name = new char[slen + 1]; + slen = 0; + break; + + case '&': + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + j += 2; + slen++; + break; + + default: /* Nothing special, move along, folks... */ + slen++; + break; + } + break; + } + } + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + mode = 0; + cur = ( Item * ) aVars.getAt( 0 ); + k = 0; + nCur = 0; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + mode = 1; + k = 0; + break; + + case '&': + mode = 0; + k = 0; + nCur++; + cur = ( Item * ) aVars.getAt( nCur ); + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + hexbuf[0] = buf[++j]; + hexbuf[1] = buf[++j]; + chr = ( char ) ( strtol( hexbuf, NULL, 16 ) ); + break; + + case '+': /* Pluses mean spaces, odd, I know... */ + chr = ' '; + break; + + default: /* Nothing special, move along, folks... */ + chr = buf[j]; + break; + } + if( mode == 0 ) + { + cur->name[k] = chr; + cur->name[++k] = '\0'; + } + else + { + cur->value[k] = chr; + cur->value[++k] = '\0'; + } + break; + } + } + delete buf; + } + else if( !strncmp + ( getenv( "CONTENT_TYPE" ), "multipart/form-data;", 20 ) ) + { + char *boundary, *oname; + int blen, j, k, olen; + + length = atoi( getenv( "CONTENT_LENGTH" ) ); + buf = new char[length + 1]; + fread( buf, 1, length, stdin ); + for( blen = 0; buf[blen + 1] != '\n'; blen++ ); + boundary = new char[blen + 1]; + memcpy( boundary, buf, blen ); + boundary[blen] = '\0'; + j = blen + 2; + for( ;; ) + { + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + if( !strncmp + ( buf + j, "Content-Disposition: form-data; name=\"", + 38 ) ) + { + j += 38; + for( k = 0; buf[j + k] != '\"'; k++ ); + oname = cur->name = new char[k + 1]; + memcpy( cur->name, buf + j, k ); + olen = k; + cur->name[k] = '\0'; + j += k + 1; + if( !strncmp( buf + j, "; filename=\"", 12 ) ) /* Must be a file */ + { + /* Acquire file name */ + j += 12; + for( k = 0; buf[j + k] != '\"'; k++ ); + cur->value = new char[k + 1]; + memcpy( cur->value, buf + j, k ); + cur->value[k] = '\0'; + cur->len = k; + j += k + 3; + + /* Acquire content type */ + if( !strncmp( "Content-Type: ", buf + j, 14 ) ) + { + j += 14; + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + cur->name = new char[olen + 1]; + memcpy( cur->name, oname, olen + 1 ); + for( k = 0; buf[j + k + 1] != '\n'; k++ ); + cur->value = new char[k + 1]; + memcpy( cur->value, buf + j, k ); + cur->value[k] = '\0'; + cur->len = k; + j += k; + } + else + { + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + cur->name = new char[olen + 1]; + memcpy( cur->name, oname, olen + 1 ); + cur->value = new char[1]; + cur->value[0] = '\0'; + cur->len = 0; + } + j += 4; + + /* Acquire content */ + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_STDINPUT; + cur->name = new char[olen + 1]; + memcpy( cur->name, oname, olen + 1 ); + if( !strncmp( buf + j + k, boundary, blen ) ) + { + cur->value = new char[1]; + cur->value[0] = '\0'; + j += blen + 4; + } + else if( !strncmp( buf + j + k + 1, boundary, blen ) ) + { + cur->value = new char[1]; + cur->value[0] = '\0'; + j += blen + 5; + } + else + { + for( k = 0; + strncmp( buf + j + k + 2, boundary, blen ); + k++ ); + cur->value = new char[k + 1]; + memcpy( cur->value, buf + j, k ); + cur->value[k] = '\0'; + cur->len = k; + j += k + blen + 4; + } + } + else + { + j += 4; + for( k = 0; + strncmp( buf + j + k + 2, boundary, blen ); + k++ ); + cur->value = new char[k + 1]; + memcpy( cur->value, buf + j, k ); + cur->value[k] = '\0'; + cur->len = k; + j += k + blen + 4; + } + if( buf[j + 1] == '\n' ) + j += 2; + if( j >= length ) + break; + } + else + { + cur->name = ( char * ) "ERROR"; + cur->value = ( char * ) "Error here"; + } + } + } + delete buf; + } + + if( ( buf = getenv( "HTTP_COOKIE" ) ) ) + { + int lbase = aVars.getSize( ); + length = strlen( buf ); + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_COOKIE; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + cur->name = new char[slen + 1]; + slen = 0; + break; + + case ';': + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_COOKIE; + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + j += 2; + slen++; + break; + + default: /* Nothing special, move along, folks... */ + slen++; + break; + } + break; + } + } + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + cur = ( Item * ) aVars.getAt( lbase ); + mode = 0; + k = 0; + nCur = lbase; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + mode = 1; + k = 0; + break; + + case ';': + mode = 0; + k = 0; + nCur++; + cur = ( Item * ) aVars.getAt( nCur ); + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + hexbuf[0] = buf[++j]; + hexbuf[1] = buf[++j]; + chr = ( char ) ( strtol( hexbuf, NULL, 16 ) ); + break; + + case '+': /* Pluses mean spaces, odd, I know... */ + chr = ' '; + break; + + case ' ': + continue; + break; + + default: /* Nothing special, move along, folks... */ + chr = buf[j]; + break; + } + if( mode == 0 ) + { + cur->name[k] = chr; + cur->name[++k] = '\0'; + } + else + { + cur->value[k] = chr; + cur->value[++k] = '\0'; + } + break; + } + } + } + + if( ( buf = getenv( "QUERY_STRING" ) ) ) + { + if( strlen( buf ) > 0 ) + { + int lbase = aVars.getSize( ); + length = strlen( buf ); + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_CMDLINE; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + cur->name = new char[slen + 1]; + slen = 0; + break; + + case '&': + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + cur = new Item( ); + aVars.append( cur ); + cur->type = VAR_CMDLINE; + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + j += 2; + slen++; + break; + + default: /* Nothing special, move along, folks... */ + slen++; + break; + } + break; + } + } + cur->value = new char[slen + 1]; + cur->len = slen; + slen = 0; + cur = ( Item * ) aVars.getAt( lbase ); + nCur = lbase; + mode = 0; + k = 0; + for( j = 0; j < length; j++ ) + { + switch ( buf[j] ) + { + case '=': + mode = 1; + k = 0; + break; + + case '&': + mode = 0; + k = 0; + nCur++; + cur = ( Item * ) aVars.getAt( nCur ); + break; + + default: + switch ( buf[j] ) + { + case '%': /* per-cents mean a hex-code for an ASCII char */ + hexbuf[0] = buf[++j]; + hexbuf[1] = buf[++j]; + chr = ( char ) ( strtol( hexbuf, NULL, 16 ) ); + break; + + case '+': /* Pluses mean spaces, odd, I know... */ + chr = ' '; + break; + + default: /* Nothing special, move along, folks... */ + chr = buf[j]; + break; + } + if( mode == 0 ) + { + cur->name[k] = chr; + cur->name[++k] = '\0'; + } + else + { + cur->value[k] = chr; + cur->value[++k] = '\0'; + } + break; + } + } + } + } +} + +Cgi::~Cgi( ) +{ +} + +char *Cgi::getVarValue( const char *name, int skip, unsigned char type ) +{ + for( int j = 0; j < aVars.getSize( ); j++ ) + { + Item *cur = ( Item * ) aVars.getAt( j ); + if( !strcmp( cur->name, name ) ) + { + if( ( cur->type & type ) ) + { + if( skip <= 0 ) + { + return cur->value; + } + else + { + skip--; + } + } + } + } + return NULL; +} + +int Cgi::getVarLength( const char *name, int skip, unsigned char type ) +{ + for( int j = 0; j < aVars.getSize( ); j++ ) + { + Item *cur = ( Item * ) aVars.getAt( j ); + if( !strcmp( cur->name, name ) ) + { + if( ( cur->type & type ) ) + { + if( skip <= 0 ) + { + return cur->len; + } + else + { + skip--; + } + } + } + } + return -1; +} + +void Cgi::writeDebugInfo() +{ + printf( "
\n" );
+    printf( "0x%02X - stdInput | 0x%02X - cookie | 0x%02X - cmdLine\n\n",
+             VAR_STDINPUT, VAR_COOKIE, VAR_CMDLINE );
+    for( int j = 0; j < aVars.getSize(  ); j++ )
+    {
+        Item *item = ( Item * ) aVars.getAt( j );
+        printf("[%s] = \"%s\" [0x%02X]\n", item->name,
+                 item->value, item->type );
+    }
+    printf( "
\n" ); +} + +void Cgi::writeContentHeader( int type ) +{ + switch( type ) + { + case headerHTML: + printf("Content-type: text/html\n\n"); + break; + } +} + +void Cgi::writeContent( const char *name, ...) +{ + char *templ = (char *)aContent.get(name); + + if( templ ) + { + va_list ap; + + va_start (ap, name); + vprintf (templ, ap); + va_end (ap); + } + else + { + printf("Error finding content labeled \"%s\"\n", name ); + } +} + +void Cgi::loadContent( const char *strSource ) +{ + FILE *fh = NULL; + if( strSource == NULL ) + { + extern char *program_invocation_short_name; + char *tmpName = new char[strlen(program_invocation_short_name)+10]; + memset( tmpName, 0, strlen(program_invocation_short_name)+10 ); + strcpy( tmpName, program_invocation_short_name ); + strcat( tmpName, ".content" ); + fh = fopen( tmpName, "rt" ); + delete tmpName; + } + else + { + fh = fopen( strSource, "rt" ); + } + + if( fh == NULL ) return; + + struct stat xStats; + + fstat( fileno( fh ), &xStats ); + + char *bigBuf = new char[xStats.st_size+1]; + memset( bigBuf, 0, xStats.st_size+1 ); + fread( bigBuf, 1, xStats.st_size, fh ); + fclose( fh ); + + // Now we can actually load stuff from the file, first we need to make us up a format... + int lSize=0; + struct Content + { + char *name; + char *value; + } xCont; + int j = 0; + while( j < xStats.st_size ) + { + // We're looking for a content-block init statement + for( ; j < xStats.st_size; j++ ) + { + if( bigBuf[j] == '#' ) + { + if( bigBuf[j+1] == '{' ) + { + break; + } + } + } + j=j+2; + if( j >= xStats.st_size ) break; + for( ; bigBuf[j] == ' ' || bigBuf[j] == '\t'; j++ ); + for( lSize = 0; lSize+j < xStats.st_size && bigBuf[lSize+j] != '\n' && bigBuf[lSize+j] != '\r'; lSize++ ); + xCont.name = new char[lSize+1]; + memset( xCont.name, 0, lSize+1 ); + memcpy( xCont.name, &bigBuf[j], lSize ); + j += lSize+1; + + for( lSize = 0; lSize+j < xStats.st_size; lSize++ ) + { + if( bigBuf[lSize+j] == '#' ) + { + if( bigBuf[lSize+j+1] == '}' ) + { + break; + } + } + } + xCont.value = new char[lSize+1]; + memset( xCont.value, 0, lSize+1 ); + memcpy( xCont.value, &bigBuf[j], lSize ); + + aContent.insert( xCont.name, xCont.value ); + + j += lSize + 2; + } +} + +void Cgi::writeCookie( char const *name, char const *value, char const *expires, char const *path, char const *domain, bool secure ) +{ + printf("Set-Cookie: %s=%s", name, value ); + + if( expires != NULL ) + { + printf("; expires=%s", expires ); + } + + if( path != NULL ) + { + printf("; path=%s", path ); + } + + if( domain != NULL ) + { + printf("; domain=%s", domain ); + } + + if( secure ) + { + printf("; secure"); + } + + printf("\n"); +} diff --git a/src/cgi.h b/src/cgi.h new file mode 100644 index 0000000..8e9a584 --- /dev/null +++ b/src/cgi.h @@ -0,0 +1,196 @@ +/**\file cgi.h + * Describes extra params needed to use the Cgi class as well as the class + * itself. + *@author Mike Buland + */ + +#include "linkedlist.h" +#include "hashtable.h" +#include "hashfunctionstring.h" + +#define VAR_STDINPUT 0x01 /**< Variable came from stdinput, web form */ +#define VAR_COOKIE 0x02 /**< Variable came from a cookie */ +#define VAR_CMDLINE 0x04 /**< Variable came from commandline / uri */ +#define VAR_ANY 0xFF /**< Mask including all other types */ + +/** + * Cgi header processor originally designed for apache cgi programs. When used + * from apache with what I beleive are some sort of standard set of command + * line parameters and environment variables. This always worked for all of my + * purposes. This class will automatically extract all data from the system + * that you need and places it into tables and things for easy access. + * There are three types of input that data can come from, StandardInput, + * CommandLine, and Cookies. StandardInput is when you get formdata in + * multi-part forms, Cookies should usually be cookies that you set, and + * command line is everything after the question mark in the URL. + * This also contains some simple helpers for putting templated data into the + * HTTP data feed. + *@author Mike Buland + */ +class Cgi +{ +public: + /** + * Create a complete CGI object, this object will automatically read data + * from all available sources and be ready for use on the very next line! + * If strSource is filled in it will also automatically read in a content + * file, which is a simple file format containing named blocks of reusable + * templates. + *@param strSource Set to a filename in order to load up a content file. + */ + Cgi( const char *strSource = NULL ); + + /** + * Destroy the cgi object. + */ + ~Cgi( ); + + /** + * Get's the value for a variable as a character string. The name is the + * name that was given on the URL or in the form or cookie. Skip can be + * set to any value above zero to retreive subsequent variables with the + * same name. The most obvious use of this is when dealing with file + * uploads, each file upload sends you three variables with the same name + * and different content. Finally the variable type determines where you + * will accept this variable from. This is generally a bit of a security + * thing, if you store login info in a cookie and don't want people getting + * in by faking the appropriate URL. + *@param name The name of the variable you wish to retreive. + *@param skip THe number of variables with the given name to skip before + * returning something meaningful. The only way to determine how many + * variables with the same name there are is to skip until you get a NULL + * value returned. + *@param type Can be set to any combination of VAR_STDINPUT, VAR_COOKIE, + * VAR_CMDLINE, or just VAR_ANY. This takes bitflags, so you can or the + * values together. If a variable is found but came from the wrong source + * it won't match any other criteria and will be treated as though it + * doesn't exist. + *@returns A null-terminated string representing the value of the requested + * variable, or NULL if the variable did not exist. If a variable does + * exist but has no value the string returned will start with a NULL char, + * but be a valid string. + */ + char *getVarValue( const char *name, int skip=0, unsigned char type=VAR_ANY ); + + /** + * This functions identically in every way to getVarValue, except that + * instead of returning a pointer to the variable's value, it returns the + * length of the variable's value string. The params are the same and so + * a call to both functions with the same params should yeild a value and + * a corresponding length. + *@param name The name of the variable you wish to retreive. + *@param skip THe number of variables with the given name to skip before + * returning something meaningful. The only way to determine how many + * variables with the same name there are is to skip until you get a NULL + * value returned. + *@param type Can be set to any combination of VAR_STDINPUT, VAR_COOKIE, + * VAR_CMDLINE, or just VAR_ANY. This takes bitflags, so you can or the + * values together. If a variable is found but came from the wrong source + * it won't match any other criteria and will be treated as though it + * doesn't exist. + *@returns The length of the value-string of the requested variable. If + * the requested variable is not found, -1 is returned. + */ + int getVarLength( const char *name, int skip=0, unsigned char type=VAR_ANY ); + + /** + * A handy little function that writes a load of debug info related to + * parsing CGI params to the standard output in html. This is generally + * best used at the end of a page. + */ + void writeDebugInfo(); + + /** + * Write a content header to the standard output. This should also be the + * first thing that you do (except for writing cookies) after initializing + * the Cgi class. You can select a type of header or content from the + * header enum, and a properly formatted header will show up on the + * standard output. + *@param type Any value from the header enum in this class. The default is + * to write an html header, probably the most common as well. + */ + void writeContentHeader( int type=headerHTML ); + + /** + * Write content to the stnadard output. The content variable should have + * been loaded during construction of the Cgi object or with the + * loadContent function. The content variable should be formatted just like + * a printf string, so that anything you want to put into it will have a % + * symbol replacement code, like %s, %d, etc. Since this actually uses a + * type of printf function everything from those docs work here. + *@param name The name of the content variable to format and write to + * stnadard output. + *@param ... As many params as you want to include, ala printf. + */ + void writeContent( const char *name, ...); + + /** + * Load a content file. I don't want to describe the format here, you can + * just read the code or find an example for now. Sorry. + *@param strSource The name of the file to open and read in to get the + * content loaded. + */ + void loadContent( const char *strSource = NULL ); + + /** + * Write a cookie-set header to the output stream. This should be done + * before any other content-headers are written. The specifics of this + * function are very simple, since I rely on the user's understanding of + * how standard HTTP/1.1 or HTTP/1.0 cookie syntax works. If you don't + * care then just use the name and value and the defaults should keep you + * in good stead for a long time. + *@param name The name of the cookie variable to set. + *@param value The value to set to that variable. + *@param expires The formatted string value for the date and time this + * cookie should expire. A NULL here will put a "until the browser closes" + * tag in. + *@param path The path (URL) that this cookie belongs to. If you run a lot + * of hosted servers or sub-sites that may have some shared URL bits then + * you may want to set this. The cookie should only be sent to URL's that + * match this as their first part. + *@param domain The domain that is allowed to read this, if not set, it's + * the domain the web browser contacted when they got the cookie. + *@param secure I'm not sure, I think it's something to tell if the cookie + * is safe to keep because any potentially valuable data is encypted or + * otherwise unusable. I could be wrong. + */ + void writeCookie( char const *name, char const *value, char const *expires=NULL, char const *path=NULL, char const *domain=NULL, bool secure=false ); + + /** + * A simple helper class to contain variable data. + */ + class Item + { + public: + /** + * Build an empty Item. + */ + Item( ) + { + name = NULL; + value = NULL; + len = 0; + type = 0; + } + /** The name of the item. */ + char *name; + /** The value of the item. */ + char *value; + /** The length of the item's value. */ + unsigned long len; + /** The type of the item (where it came from). */ + unsigned char type; + }; + + /** Header values */ + enum + { + headerHTML + }; + +private: + /** Keeps track of all contained variables. */ + LinkedList aVars; + /** Keeps track of all content variables. */ + HashTable aContent; +}; diff --git a/src/connection.cpp b/src/connection.cpp new file mode 100644 index 0000000..a277ea7 --- /dev/null +++ b/src/connection.cpp @@ -0,0 +1,432 @@ +#include "connection.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +Connection::Connection() +{ + nSocket = -1; + bActive = false; + bDisconnectMe = false; + pProtocol = NULL; +} + +Connection::~Connection() +{ + if( pProtocol != NULL ) delete pProtocol; +} + +bool Connection::appendOutput( const char *lpOutput, int nSize ) +{ + return xOutputBuf.appendData( lpOutput, nSize ); +} + +bool Connection::appendOutput( const char lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const short lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const int lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const long lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const float lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const double lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const unsigned char lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const unsigned short lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const unsigned long lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendOutput( const unsigned int lOutput ) +{ + return xOutputBuf.appendData( lOutput ); +} + +bool Connection::appendInput( const char *lpInput, int nSize ) +{ + return xInputBuf.appendData( lpInput, nSize ); +} + +int Connection::scanInputFor( char cTarget ) +{ + const char *lpTmp = xInputBuf.getData(); + int jMax = xInputBuf.getLength(); + + for( int j = 0; j < jMax; j++ ) + { + if( lpTmp[j] == cTarget ) + { + return j; + } + } + + return -1; +} + +const char *Connection::getOutput() +{ + return xOutputBuf.getData(); +} + +const char *Connection::getInput() +{ + return xInputBuf.getData(); +} + +void Connection::setSocket( int nNewSocket ) +{ + nSocket = nNewSocket; +} + +int Connection::getSocket() +{ + return nSocket; +} + +bool Connection::isActive() +{ + return bActive; +} + +void Connection::close() +{ + if( bActive ) + { + fsync( nSocket ); + ::close( nSocket ); + } + bActive = false; + //nSocket = -1; + xInputBuf.clearData(); + xOutputBuf.clearData(); + if( pProtocol != NULL ) + { + delete pProtocol; + pProtocol = NULL; + } +} + +bool Connection::open( int nNewSocket ) +{ + bActive = true; + setSocket( nNewSocket ); + bDisconnectMe = false; + + return true; +} + +bool Connection::open( const char *sAddr, int nPort ) +{ + struct sockaddr_in xServerName; + bActive = false; + + /* Create the socket. */ + nSocket = socket( PF_INET, SOCK_STREAM, 0 ); + if( nSocket < 0 ) + { + bActive = false; + return false; + } + + /* Connect to the server. */ + { + struct hostent *hostinfo; + + xServerName.sin_family = AF_INET; + xServerName.sin_port = htons( nPort ); + hostinfo = gethostbyname( sAddr ); + if (hostinfo == NULL) + { + return false; + } + xServerName.sin_addr = *(struct in_addr *) hostinfo->h_addr; + } + + int ret = connect( + nSocket, + (struct sockaddr *)&xServerName, + sizeof(xServerName) + ); + + if( ret < 0 ) + { + return false; + } + + bActive = true; + bDisconnectMe = false; + + return true; +} + +bool Connection::readInput() +{ + char buffer[2048]; + int nbytes; + int nTotalRead=0; + + for(;;) + { + memset( buffer, 0, 2048 ); + + nbytes = read( nSocket, buffer, 2048 ); + if (nbytes < 0) + { + /* Read error. */ + //perror("readInput"); + return false; + } + else if (nbytes == 0) + { + /* End-of-file. */ + //perror("readInput"); + return false; + } + else + { + nTotalRead += nbytes; + appendInput( buffer, nbytes ); + /* Data read. */ + if( nbytes < 2047 ) + { + if( pProtocol != NULL && nTotalRead > 0 ) + { + pProtocol->onNewData(); + } + + return true; + } + } + } + + return true; +} + +bool Connection::readInput( int nSec, int nUSec ) +{ + fd_set rfds; + struct timeval tv; + int retval; + + /* Watch stdin (fd 0) to see when it has input. */ + FD_ZERO(&rfds); + FD_SET(nSocket, &rfds); + /* Wait up to five seconds. */ + tv.tv_sec = nSec; + tv.tv_usec = nUSec; + + retval = select( nSocket+1, &rfds, NULL, NULL, &tv ); + /* Don't rely on the value of tv now! */ + + if (retval == -1) + { + // Oh my god!!! some kind of horrible problem!!!! + return false; + } + else if( retval ) + { + // None of them have data, but the connection is still active. + return readInput(); + } + else + { + return true; + } +} + +bool Connection::clearOutput() +{ + return xOutputBuf.clearData(); +} + +bool Connection::clearInput() +{ + return xInputBuf.clearData(); +} + +#define min( a, b ) ((asetConnection( this ); +} + +int Connection::getInputAmnt() +{ + return xInputBuf.getLength(); +} + +int Connection::getOutputAmnt() +{ + return xOutputBuf.getLength(); +} + +class Protocol *Connection::getProtocol() +{ + return pProtocol; +} + +void Connection::printInputDebug( const char *lpPrefix, FILE *fh, int nBytesMax ) +{ + printDataDebug( + (const unsigned char *)xInputBuf.getData(), + xInputBuf.getLength(), + "input", + lpPrefix, + fh, + nBytesMax + ); +} + +void Connection::printOutputDebug( const char *lpPrefix, FILE *fh, int nBytesMax ) +{ + printDataDebug( + (const unsigned char *)xOutputBuf.getData(), + xOutputBuf.getLength(), + "output", + lpPrefix, + fh, + nBytesMax + ); +} + +void Connection::printDataDebug( const unsigned char *pData, long nDataLen, const char *lpName, const char *lpPrefix, FILE *fh, int nBytesMax ) +{ + if( nBytesMax > 0 ) + { + nDataLen = (nBytesMax32 && pData[j+k]<=128)?(pData[j+k]):('.') ); + } + fprintf( fh, "\n"); + j += kmax; + if( j >= nDataLen ) break; + } + fprintf( fh, lpPrefix ); + for( int l = 0; l < 8*3+2*8+2; l++ ) fprintf( fh, (l!=8*3)?("-"):("+") ); fprintf( fh, "\n"); +} + diff --git a/src/connection.h b/src/connection.h new file mode 100644 index 0000000..efb8630 --- /dev/null +++ b/src/connection.h @@ -0,0 +1,387 @@ +/**\file + * Contains the Connection class. + *@author Mike Buland + */ + +#ifndef CONNECTION_H +#define CONNECTION_H + +#include "multilog.h" +#include "flexbuf.h" +#include "protocol.h" + +/** Represents a single connection on a network. While these connections + * may be treated more or less just like files, occasionally problems arise + * when writing data at any time you feel like. Therefore you run all your + * data through a Connection, which buffers all data and makes sure no + * buffers are exceeded and nothing inappropriate for the recipient of the + * data is sent. + *@author Mike Buland + */ +class Connection +{ +public: + /** + * Construct a blank and non-connected Connection. The created object is + * not yet connected to anything, and most of the functions except open are + * unusable. + */ + Connection(); + + /** + * Destroy the connection, clean up all pending data requests and close the + * contained socket. This does not send out pending data, especially since + * such an operation could take considerable time, depending on the pending + * data and state of the receiving end. + */ + ~Connection(); + + /** + * Open a connection to a remote server. This sets up this connection as + * a client instead of a server and does all of the work that needs to be + * done to actually open an INET_AF connection, which is a lot of work. + *@param sAddr The address to connect to. This can be in any format + * normally understood by your system to be an address, ip, domain name, + * etc. + *@param nPort The port number to connect to on the remote server. + *@returns True if the connection was successful and everything is setup, + * false if there were any of a dozen errors and the connection is not set. + *@todo Make this function add log entries to a standard MultiLog if + * something goes wrong. + */ + bool open( const char *sAddr, int nPort ); + + /** Append the given data to the output. The data is presumed to be null + * terminated. To put binary data into the stream, use the other + * appendOutput function. This should be the only method used to + * communicate with the socket. + *@param lpOutput The data to add to the output queue. + *@param nSize How much data is in the lpOutput buffer. If this value + * is -1 then the program treats lpOutput as a null-terminated string. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const char *lpOutput, int nSize=-1 ); + + /** + * Append the character to the output. + *@param lOutput The character to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const char lOutput ); + + /** + * Append the short to the output. + *@param lOutput The short to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const short lOutput ); + + /** + * Append the int to the output. + *@param lOutput The int to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const int lOutput ); + + /** + * Append the long to the output. + *@param lOutput The long to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const long lOutput ); + + /** + * Append the float to the output. + *@param lOutput The float to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const float lOutput ); + + /** + * Append the double to the output. + *@param lOutput The double to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const double lOutput ); + + /** + * Append the unsigned char to the output. + *@param lOutput The unsigned char to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const unsigned char lOutput ); + + /** + * Append the unsigned short to the output. + *@param lOutput The unsigned short to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const unsigned short lOutput ); + + /** + * Append the unsigned int to the output. + *@param lOutput The unsigned int to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const unsigned int lOutput ); + + /** + * Append the unsigned long to the output. + *@param lOutput The unsigned long to add to the output queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendOutput( const unsigned long lOutput ); + + /** + * Writes all input data in the buffer in a dual-view ascii and hex display + * to a file. There are a number of options that also help with debugging. + *@param lpPrefix Text to be added to the begining of every line written + * out. The default is a blank string. + *@param fh The file to write the data to in text mode. This is stdout by + * default, but could be any already open file handle. + *@param nBytesMax The maximum number of bytes to write to the output. The + * amount of data can be overwhelming sometimes, so you can limit it. The + * default value is -1, which is also unlimited. + */ + void printInputDebug( const char *lpPrefix="", FILE *fh=stdout, int nBytesMax=-1 ); + + /** + * Writes all output data in the buffer in a dual-view ascii and hex display + * to a file. There are a number of options that also help with debugging. + *@param lpPrefix Text to be added to the begining of every line written + * out. The default is a blank string. + *@param fh The file to write the data to in text mode. This is stdout by + * default, but could be any already open file handle. + *@param nBytesMax The maximum number of bytes to write to the output. The + * amount of data can be overwhelming sometimes, so you can limit it. The + * default value is -1, which is also unlimited. + */ + void printOutputDebug( const char *lpPrefix="", FILE *fh=stdout, int nBytesMax=-1 ); + + /** + * This is the low-level generic function that is called by both + * printInputDebug and printOutputDebug. It works effectively just like + * both of them, except that you can give it a raw pointer to the data to + * print out. This probably doesn't belong in this class, but this was + * where I was when I needed it. + *@param pData A pointer to the data to write. This is not treated as a + * null terminated string, so make sure that the nDataLen param is set + * properly. + *@param nDataLen The number of bytes that are in pData and that you want to + * see. + *@param lpName The name of the data, this is used in the header where it + * says "Displaying nnn bytes of ." A good example would be input + * or output. + *@param lpPrefix Text to put before every line output. This just makes it + * easier to tell large blocks apart in the output. + *@param fh The file handle to write all data to. + *@param nBytesMax The maximum number of bytes. This parameter is stupid. + * If it is set to -1, then nDataLen is used, otherwise the smaller value is + * used as the number of bytes to output. + *@todo Put this function somewhere more deserving. + *@todo Remove the nBytesMax param, we need that in the other functions, + * not this one! + */ + void printDataDebug( const unsigned char *pData, long nDataLen, const char *lpName, const char *lpPrefix, FILE *fh, int nBytesMax ); + + /** Append the given data to the input. The data is presumed to be null + * terminated. To put binary data into the stream, use the other + * appendInput function. This is mainly used by internal routines. + *@param lpInput The data to add to the input queue. + *@param nSize How much data is in the lpInput buffer. If this value + * is -1 then the program treats lpOutput as a null-terminated string. + *@returns True if everything is ok, false otherwise. + */ + bool appendInput( const char *lpInput, int nSize=-1 ); + + /** Searches through the current pending input for a certain character. + * This is useful for finding out where exactly the end of a line is, for + * example, to see if a command has been entered yet. + *@param cTarget The character to search for. + *@returns The position of the target relative to the begining of the input + * or -1 if the target wasn't found. + */ + int scanInputFor( char cTarget ); + + /** Gets a pointer to the output buffer. This is mainly used by internal + * routines, and is cleared every click when data is sent out again. + *@returns A pointer to the buffer holding the pending output data. + */ + const char *getOutput(); + + /** Gets a pointer to the start of the input buffer's active data + * section. Use this to gain access to the input you need to do + * your job. + *@returns A pointer to the data in the input buffer. Do not delete this. + */ + const char *getInput(); + + /** Clears all pending output, this is mainly just used internally. + *@returns True if operation was a success, otherwise false. + */ + bool clearOutput(); + + /** Clears all pending input, weather it's been used or not. Please + * refrain from calling this during normal operation, use usedInput + * instead, it's much safer. + *@returns True if the operation was a success, false otherwise. + */ + bool clearInput(); + + /** Sets the socket that should be used internally. + *@param nNewSocket The new socket to work with. + */ + void setSocket( int nNewSocket ); + + /** Gets the handle (number) of the working socket. This can be a + * dangerous function to call, please refrain from calling it directly + * if any alternative can be found. + *@returns The number of the working socket. + */ + int getSocket(); + + /** Determines if the connection is still active. + *@returns True if the connection is active, false otherwise. + */ + bool isActive(); + + /** Clears all buffers and sets up the connection to be reused. + * Does not actually close the socket, that's handled by the + * ConnectionManager + */ + void close(); + + /** Opens a socket. Really just sets up the connection for use since + * the socket itself was created and opened by the ConnectionManager. + * This also calls setSocket so you don't have to. + *@param nNewSocket The socket to assosiate with. + */ + bool open( int nNewSocket ); + + /** + * Reads all pending input from the connection. If this is called outside + * of the ConnectionManager it will usually block indefinately waiting for + * new data. The only way to change this behaviour is to modify the socket + * low-level when you connect it manually, or, preferably use the other + * readInput function to control blocking time. + *@returns True socket is still connected, otherwise false. + */ + bool readInput(); + + /** + * Reads all pending input from the connection, blocking up to nSec + * seconds and nUSec micro-seconds for the data. This uses select to + * simulate blocking, but has the same effect as standard io blocking. + * If you don't want to block, just set both values to zero. + *@param nSec Max seconds to wait. + *@param nUSec Max micro-seconds to wait. + */ + bool readInput( int nSec, int nUSec ); + + /** Writes all data that is pending to the socket. + *@returns True if all data was written succesfully, false otherwise. + */ + bool writeOutput(); + + /** Determines if the connection has output waiting to go out. + *@returns true if there is pending output, otherwise false. + */ + bool hasOutput(); + + /** Sets internal flags so that this connection will be deleted next + * time through the ConnectionManager. + */ + void disconnect(); + + /** Determines if this connection is ready to be disconnected or not. + *@returns True if it is time to disconnect, false if it isn't. + */ + bool needDisconnect(); + + /** Tells the caller if there is pending input waiting to be processed. + *@returns True if there is pending input that has not been used, returns + * false if there isn't. + */ + bool hasInput(); + + /** Removes bytes from the begining of the input queue. Use this after + * getting the input and processing as much as you need to. + *@param nAmount The number of bytes used. + *@returns true if the update was successful, otherwise false. + */ + bool usedInput( int nAmount ); + + /** Sets the protocol to be used by this connection. All data in and out + * passes through the protocol object, which may process that data to + * filter out and process any special messages that may have been + * included. Everything that isn't processed can be accessed in the + * standard method. + *@param pNewProtocol A pointer to a protocol object that you want to + * use. + */ + void setProtocol( class Protocol *pNewProtocol ); + + /** Gets the number of bytes that are waiting in the input queue, the data + * that has yet to be processed. + *@returns The number of bytes in the input queue. + */ + int getInputAmnt(); + + /** Gets the number of bytes that are waiting in the output queue, the data + * that has yet to be sent to the connected socket. + *@returns The number of bytes in the input queue. + */ + int getOutputAmnt(); + + /** Gets a pointer to the protocol that is attatched to this connection + * object. This is useful to set modes, and send special commands in + * addition to the standard raw data reads and writes that are normally + * permitted. In fact, in everything besides a raw telnet protocol all + * data should be sent through the protocol and not the connection object. + *@returns A pointer to the Protocol assosiated with this connection. + */ + class Protocol *getProtocol(); + +private: + /** + * A buffer to keep data read from the socket in. This is filled in by + * the function readInput, which is automatically called by the + * ConnectionManager whenever new data is ready. + */ + FlexBuf xInputBuf; + + /** + * A buffer to keep data that should be sent to the socket. This is filled + * in by using the AppendOutput functions and is sent to the socket using + * the writeOutput function, which is automatically called every cycle by + * the ConnectionManager when there is pending data. + */ + FlexBuf xOutputBuf; + + /** + * The socket that the user is connected to. This is not the same as the + * socket number of the listening socket, this is the unique socket on the + * system that the data is coming to. + */ + int nSocket; + + /** + * True=active connection, False=connection lost + */ + bool bActive; + + /** + * True=disconnect next cycle (after data is transmitted), Flse=keep going. + */ + bool bDisconnectMe; + + /** + * A pointer to a protocol handler that can automatically process the data + * in the buffers. This is optional if you use the connections on your own + * but reccomended if you use this with the rest of the ConnectionManager + * system. + */ + class Protocol *pProtocol; +}; + +#endif diff --git a/src/connectionmanager.cpp b/src/connectionmanager.cpp new file mode 100644 index 0000000..36ff961 --- /dev/null +++ b/src/connectionmanager.cpp @@ -0,0 +1,343 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "connectionmanager.h" +#include + +ConnectionManager::ConnectionManager() +{ + pLog = MultiLog::getLog(); + nMasterSocket = -1; + pMonitor = NULL; +} + +ConnectionManager::~ConnectionManager() +{ + std::list::const_iterator i; + for( i = lActive.begin(); i != lActive.end(); i++ ) + { + delete (*i); + } + for( i = lInactive.begin(); i != lInactive.end(); i++ ) + { + delete (*i); + } +} + +bool ConnectionManager::startServer( int nPort, int nInitPool ) +{ + /* Create the socket and set it up to accept connections. */ + struct sockaddr_in name; + + /* Create the socket. */ + nMasterSocket = socket (PF_INET, SOCK_STREAM, 0); + if (nMasterSocket < 0) + { + pLog->LineLog( MultiLog::LError, "Couldn't create a listen socket."); + return false; + } + + /* Give the socket a name. */ + name.sin_family = AF_INET; + name.sin_port = htons( nPort ); + + // I think this specifies who we will accept connections from, + // a good thing to make configurable later on + name.sin_addr.s_addr = htonl( INADDR_ANY ); + + int opt = 1; + setsockopt( nMasterSocket, SOL_SOCKET, SO_REUSEADDR, (char *)&opt, sizeof(opt)); + + if (bind (nMasterSocket, (struct sockaddr *) &name, sizeof (name)) < 0) + { + pLog->LineLog( MultiLog::LError, "Couldn't bind to the listen socket."); + return false; + } + + if (listen (nMasterSocket, 1) < 0) + { + pLog->LineLog( MultiLog::LError, "Couldn't begin listening to the server socket."); + return false; + } + + /* Initialize the set of active sockets. */ + FD_ZERO (&fdActive); + FD_ZERO (&fdRead); + FD_ZERO (&fdWrite); + FD_ZERO (&fdException); + FD_SET (nMasterSocket, &fdActive); + + for( int j = 0; j < nInitPool; j++ ) + { + lInactive.insert( lInactive.begin(), new Connection() ); + } + + return true; +} + +bool ConnectionManager::startServer( int nPort, int nInitPool, int nNumTries, int nTimeout ) +{ + struct timeval xTimeout; + + for( int j = 0; j < nNumTries; j++ ) + { + pLog->LineLog( MultiLog::LStatus, "Attempting to create server socket (attempt [%d/%d])...", j+1, nNumTries ); + if( startServer( nPort, nInitPool ) == true ) + { + return true; + } + else if( j < nNumTries-1 ) + { + pLog->LineLog( MultiLog::LStatus, "Waiting for %d secconds to allow port to clear...", nTimeout ); + xTimeout.tv_sec = nTimeout; + xTimeout.tv_usec = 0; + if (select(0, (fd_set *) 0, (fd_set *) 0, (fd_set *) 0, &xTimeout) < 0) { + pLog->LineLog( MultiLog::LError, "Error using select to sleep for a while."); + } + usleep( nTimeout ); + } + } + + return false; +} + +bool ConnectionManager::scanConnections( int nTimeout, bool bForceTimeout ) +{ + struct timeval xTimeout; + + xTimeout.tv_sec = nTimeout / 1000000; + xTimeout.tv_usec = nTimeout % 1000000; + + /* Block until input arrives on one or more active sockets. */ + fdRead = fdActive; + fdWrite = fdActive; + fdException = fdActive; + + // We removed the write checking because it just checks to see if you *can* + // write...that's stupid, they're all open, so it always exits immediately + // if there are ANY connections there... + if( TEMP_FAILURE_RETRY( select( FD_SETSIZE, &fdRead, (fd_set *)0/*&fdWrite*/, &fdException, &xTimeout ) ) < 0 ) + { + pLog->LineLog( MultiLog::LError, "Error attempting to scan open connections."); + perror("ConnectionManager"); + return false; + } + // Now we use select to sleep as well as to scan for connections, now we + // just need to fix the fact that if there are no connections, the seccond + // select call doesn't return until there is a connection... + if( bForceTimeout ) + { + if (select(0, (fd_set *) 0, (fd_set *) 0, (fd_set *) 0, &xTimeout) < 0) { + pLog->LineLog( MultiLog::LError, "Error using select to sleep for a while."); + } + } + + /* Service all the sockets with input pending. */ + for( int i = 0; i < FD_SETSIZE; ++i ) + { + if( FD_ISSET( i, &fdRead ) ) + { + if( i == nMasterSocket ) + { + addConnection(); + } + else + { + Connection *pCon = findActiveConnection( i ); + if( pCon == NULL ) + { + pLog->LineLog( MultiLog::LError, "A connection object was lost, or never created!"); + return false; + } + + /* Data arriving on an already-connected socket. */ + if( pCon->readInput() != true ) + { + pLog->LineLog( MultiLog::LStatus, "Closing connection due to disconnect."); + close( i ); + FD_CLR( i, &fdActive ); + pMonitor->onClosedConnection( pCon ); + pCon->close(); + } + else + { + // We actually read something...but the connection handles + // protocol notification, so we don't need to do anything + // here... + } + } + } + } + std::list::iterator i; + for( i = lActive.begin(); i != lActive.end(); i++ ) + { + if( (*i)->isActive() == false ) + { + std::list::iterator l = i; + i--; + lInactive.insert( lInactive.end(), *l ); + lActive.erase( l ); + continue; + } + if( (*i)->hasOutput() ) + { + (*i)->writeOutput(); + } + if( (*i)->needDisconnect() ) + { + int prt = (*i)->getSocket(); + close( prt ); + FD_CLR( prt, &fdActive ); + pMonitor->onClosedConnection( *i ); + (*i)->close(); + lInactive.insert( lInactive.end(), *i ); + std::list::iterator l = i; + i--; + lActive.erase( l ); + pLog->LineLog( MultiLog::LStatus, "Closing connection due to server request."); + } + } + + return true; +} + +bool ConnectionManager::shutdownServer() +{ + while( !lActive.empty() ) + { + Connection *i = *(lActive.begin()); + if( i->isActive() ) + { + i->close(); + pMonitor->onClosedConnection( i ); + lInactive.insert( lInactive.end(), i ); + lActive.erase( lActive.begin() ); + } + } +/* + for( int i = 0; i < nPoolSize; i++ ) + { + + int prt = axConPool[i].getSocket(); + close( prt ); +// FD_CLR( prt, &fdActive ); + pMonitor->onClosedConnection( &axConPool[i] ); + axConPool[i].close(); + } +*/ + shutdown( nMasterSocket, SHUT_RDWR ); + close( nMasterSocket ); + + return true; +} + +bool ConnectionManager::broadcastMessage( const char *lpData, int nExcludeSocket ) +{ + std::list::const_iterator i; + for( i = lActive.begin(); i != lActive.end(); i++ ) + { + if( (*i)->isActive() && + (*i)->getSocket() != nExcludeSocket ) + { + (*i)->appendOutput( lpData ); + } + } + + return true; +} + +bool ConnectionManager::addConnection() +{ + struct sockaddr_in clientname; + size_t size; + int newSocket; + + size = sizeof( clientname ); +#ifdef __CYGWIN__ + newSocket = accept( nMasterSocket, (struct sockaddr *) &clientname, (int *)&size ); +#else + newSocket = accept( nMasterSocket, (struct sockaddr *) &clientname, &size ); +#endif + if( newSocket < 0 ) + { + pLog->LineLog( MultiLog::LError, "Error accepting a new connection!" ); + return false; + } +// char *tmpa = inet_ntoa(clientname.sin_addr); + char tmpa[20]; + inet_ntop( AF_INET, (void *)&clientname.sin_addr, tmpa, 20 ); + pLog->LineLog( MultiLog::LStatus, "New connection from host %s, port %hd.", tmpa, ntohs (clientname.sin_port) ); +/* + int nCnt = 0; + for( int j = 0; j < nPoolSize; j++ ) + { + if( axConPool[j].isActive() ) + { + nCnt++; + } + } + pLog->LineLog( MultiLog::LStatus, "Connections %d/%d.", nCnt, nPoolSize ); + */ +// free( tmpa ); + FD_SET( newSocket, &fdActive ); + + //void nonblock(socket_t s) + { + int flags; + + flags = fcntl(newSocket, F_GETFL, 0); + flags |= O_NONBLOCK; + if (fcntl(newSocket, F_SETFL, flags) < 0) + { + return false; + } + } + + Connection *pCon = getInactiveConnection(); + pCon->open( newSocket ); + + pMonitor->onNewConnection( pCon ); + + lActive.insert( lActive.end(), pCon ); + + return true; +} + +Connection *ConnectionManager::getInactiveConnection() +{ + if( lInactive.empty() ) + { + return new Connection(); + } + Connection *pCon = *(lInactive.begin()); + lInactive.erase( lInactive.begin() ); + return pCon; +} + +Connection *ConnectionManager::findActiveConnection( int nSocket ) +{ + std::list::const_iterator i; + for( i = lActive.begin(); i != lActive.end(); i++ ) + { + if( (*i)->getSocket() == nSocket ) + { + return *i; + } + } + + return NULL; +} + +void ConnectionManager::setConnectionMonitor( ConnectionMonitor *pNewMonitor ) +{ + pMonitor = pNewMonitor; +} diff --git a/src/connectionmanager.h b/src/connectionmanager.h new file mode 100644 index 0000000..53249a7 --- /dev/null +++ b/src/connectionmanager.h @@ -0,0 +1,138 @@ +/** + *@file + * Contains the ConnectionManager. + *@author Mike Buland + */ + +#ifndef CONNECTIONMANAGER_H +#define CONNECTIONMANAGER_H + +#include "multilog.h" +#include "connection.h" +#include "connectionmonitor.h" +#include +#include + +/** Manges incoming network connections as a server. Creates and works with + * Connection objects. All operations are performed on TCP/IP v4 right now, + * and on a single port, although any number of connections can be handled. + *@author Mike Buland + */ +class ConnectionManager +{ +public: + /** + * Sets up the basics, like storage for the pool, and so on. This does not + * actually start a server, bind to a port, or create a connection pool. + * That's all handled by startServer(). + */ + ConnectionManager(); + + /** + * Cleans up everything, and even clears out all still-connected Connection + * objects. + */ + ~ConnectionManager(); + + /** + * Starts a server socket and binds to it, listening for new connections. + *@param nPort The port to listen on. + *@param nInitPool The size of the initial connection pool. This will + * grow automatically if necesarry. + *@returns True if the socket was bound to the port and serving was + * started. False if there was a problem connecting to the port. + */ + bool startServer( int nPort, int nInitPool ); + + /** + * This is identicle to the simpler startServer function except that it + * will automatically try to connect multiple times in case the first + * attempt or two doesn't work for some reason. Initially this was + * written to compensate for server sockets staying locked after they were + * closed for a while. + *@param nPort The port to listen on. + *@param nInitPool The size of the initial connection pool. This will + * grow automatically if necesarry. + *@param nNumTries The maximum number of times to try to connect. + *@param nTimeout The amount of time to wait in-between connection + * attempts. + *@returns True if the socket was bound to the port and serving was + * started. False if there was a problem connecting to the port. + */ + bool startServer( int nPort, int nInitPool, int nNumTries, int nTimeout ); + + /** + * Scans all open connections, halting the calling processes until data + * is received or nTimeout ms have gone by. While waiting for the timeout + * to complete the process is placed into an idle mode. + *@param nTimeout The number of millisecconds to wait if there is nothing + * to actually do. + *@param bForceTimeout If set to true, this will force the scanner to wait + * for the timout to complete before returning, even if there was pending + * data. + */ + bool scanConnections( int nTimeout, bool bForceTimeout ); + + /** Shutdown the server and all assosiated sockets. + *@returns True if every socket was closed without problem. + */ + bool shutdownServer(); + + /** Sends a message directly to every connected port. + *@param lpData A null-terminated string of data to send. + *@param nExcludeSocket An optional socket to exclude from the broadcast. + *@returns True if every socket that should have gotten the message did. + */ + bool broadcastMessage( const char *lpData, int nExcludeSocket=-1 ); + + /** Sets a monitor for the manager. The monitor is sent notifications + * whenever a socket is connected, disconnected, or whenever an error + * occurs. + *@param pNewMonitor A pointer to a preconstructed ConnectionMonitor + */ + void setConnectionMonitor( ConnectionMonitor *pNewMonitor ); + +private: + /** + * Take care of the work of actually accepting a connection. This will + * accept the connection, set the initial modes, and add it to the master + * list of active connections, as well as fire off any messages that need + * to be handled by anything else. + *@returns True if everything worked, False otherwise. + */ + bool addConnection(); + + /** + * Seraches the internal lists of connections for one with a specific + * socket. + *@param nSocket The socket the connection is using for communication. + * This is the unique socket and not the one that the connection was + * initially to. + *@returns NULL if no connection was found, otherwise a pointer to a live + * Connection object. + */ + Connection *findActiveConnection( int nSocket ); + + /** + * Searches the connection pool for an object that isn't in use yet, and + * returns it, ready to be filled in and used. + *@returns An unused connection object ready for use. + *@todo Check this code over to insure that the pool grows appropriately + * when enough extra connections are detected. + */ + Connection *getInactiveConnection(); + + MultiLog *pLog; /**< A pointer to the active MultiLog */ + int nMasterSocket; /**< The listening or server socket. */ + fd_set fdActive; /**< The active socket set. */ + fd_set fdRead; /**< The sockets ready for a read. */ + fd_set fdWrite; /**< The sockets ready for a write. */ + fd_set fdException; /**< The sockets that have gotten errors. */ + std::list lInactive; /**< The pool of inactive Connections */ + std::list lActive; /**< The pool of active Connections */ + + /** The ConnectionMonitor to notify of new connections. */ + ConnectionMonitor *pMonitor; +}; + +#endif diff --git a/src/connectionmonitor.cpp b/src/connectionmonitor.cpp new file mode 100644 index 0000000..1b49f5d --- /dev/null +++ b/src/connectionmonitor.cpp @@ -0,0 +1,23 @@ +/*************************************************************************** + connectionmonitor.cpp - description + ------------------- + begin : Mon Sep 8 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "connectionmonitor.h" + +ConnectionMonitor::ConnectionMonitor(){ +} +ConnectionMonitor::~ConnectionMonitor(){ +} diff --git a/src/connectionmonitor.h b/src/connectionmonitor.h new file mode 100644 index 0000000..b96b533 --- /dev/null +++ b/src/connectionmonitor.h @@ -0,0 +1,41 @@ +/**@file + * Describes the ConnectionMonitor class. + */ +#ifndef CONNECTIONMONITOR_H +#define CONNECTIONMONITOR_H + +#include "connection.h" + +/** Connection Monitor defines the base class of the objects that will be + * notified whenever a connection is created or destroyed. + *@author Mike Buland + */ +class ConnectionMonitor +{ +public: + /** + * This is only here for completeness. It does nothing. + */ + ConnectionMonitor(); + + /** + * This is only here for completeness. It does nothing. + */ + virtual ~ConnectionMonitor(); + + /** Receives the notification that new connection was received. + *@param pCon The connection that was created. + *@returns Should return a true value if everything is OK, a false to + * force a shutdown. + */ + virtual bool onNewConnection( Connection *pCon ) = 0; + + /** Receives the notification that a connection was closed. + *@param pCon The connection that was closed. + *@returns Should return a true value if everything is OK, a false to + * force a shutdown. + */ + virtual bool onClosedConnection( Connection *pCon ) = 0; +}; + +#endif diff --git a/src/flexbuf.cpp b/src/flexbuf.cpp new file mode 100644 index 0000000..acd55a7 --- /dev/null +++ b/src/flexbuf.cpp @@ -0,0 +1,206 @@ +#include "flexbuf.h" +#include + +FlexBuf::FlexBuf() +{ + lpBuf = new char[1024]; + nLastChar = 0; + nFirstChar = 0; + nSize = 1024; + nFill = 0; + clearData(); +} + +FlexBuf::~FlexBuf() +{ + delete[] lpBuf; +} + +bool FlexBuf::appendData( const char *lpData, int nDSize ) +{ + int nStrLen; + if( nDSize < 0 ) + { + nStrLen = strlen( lpData ); + } + else + { + nStrLen = nDSize; + } + + if( nLastChar + nStrLen + 1 > nSize ) + { + if( nFill + nStrLen + 1 < nSize ) + { + memcpy( lpBuf, lpBuf+nFirstChar, nFill ); + nLastChar -= nFirstChar; + nFirstChar = 0; + } + else + { + nSize += nStrLen+1; + char *lpNewBuf = new char[nSize]; + memcpy( lpNewBuf, lpBuf+nFirstChar, nFill ); + delete[] lpBuf; + lpBuf = lpNewBuf; + nLastChar -= nFirstChar; + nFirstChar = 0; + } + } + + memcpy( &lpBuf[nLastChar], lpData, nStrLen ); + nLastChar += nStrLen; + nFill += nStrLen; + lpBuf[nLastChar] = '\0'; + + return true; +} + +bool FlexBuf::appendData( const char lData ) +{ + if( nLastChar + 2 > nSize ) + { + if( nFill+2 < nSize ) + { + memcpy( lpBuf, lpBuf+nFirstChar, nFill ); + nLastChar -= nFirstChar; + nFirstChar = 0; + } + else + { + nSize += 1024; + char *lpNewBuf = new char[nSize]; + memcpy( lpNewBuf, lpBuf+nFirstChar, nFill ); + delete[] lpBuf; + lpBuf = lpNewBuf; + nLastChar -= nFirstChar; + nFirstChar = 0; + } + } + + lpBuf[nLastChar] = lData; + nLastChar++; + nFill++; + lpBuf[nLastChar] = '\0'; + + return true; +} + +bool FlexBuf::appendData( const short lData ) +{ + return appendData( (const char *)&lData, sizeof(short) ); +} + +bool FlexBuf::appendData( const int lData ) +{ + return appendData( (const char *)&lData, sizeof(int) ); +} + +bool FlexBuf::appendData( const long lData ) +{ + return appendData( (const char *)&lData, sizeof(long) ); +} + +bool FlexBuf::appendData( const float lData ) +{ + return appendData( (const char *)&lData, sizeof(float) ); +} + +bool FlexBuf::appendData( const double lData ) +{ + return appendData( (const char *)&lData, sizeof(double) ); +} + +bool FlexBuf::appendData( const unsigned char lData ) +{ + return appendData( (const char)lData ); +} + +bool FlexBuf::appendData( const unsigned short lData ) +{ + return appendData( (const char *)&lData, sizeof(short) ); +} + +bool FlexBuf::appendData( const unsigned long lData ) +{ + return appendData( (const char *)&lData, sizeof(long) ); +} + +bool FlexBuf::appendData( const unsigned int lData ) +{ + return appendData( (const char *)&lData, sizeof(int) ); +} + +bool FlexBuf::clearData() +{ + nFirstChar = nLastChar = nFill = 0; + lpBuf[nLastChar] = '\0'; + + return true; +} + +const char *FlexBuf::getData() +{ + return (lpBuf+nFirstChar); +} + +int FlexBuf::getLength() +{ + return nFill; +} + +int FlexBuf::getCapacity() +{ + return nSize; +} + +bool FlexBuf::usedData( int nAmount ) +{ + // Remove from the end if negative + if( nAmount < 0 ) + { + if( nFill+nAmount < 0 ) + { + nFill = nFirstChar = nLastChar = 0; + return true; + } + nLastChar += nAmount; + nFill += nAmount; + return true; + } + if( nAmount > nFill ) + { + nAmount = nSize; +// return false; + } + + //nLastChar -= nAmount; + nFirstChar += nAmount; + nFill -= nAmount; + + if( nFill == 0 ) + { + nFirstChar = nLastChar = 0; + } + + //if( nLastChar > 0 ) + //{ + //memmove( lpBuf, &lpBuf[nAmount], nLastChar ); + //} + + return true; +} + +int FlexBuf::findChar( char cTarget ) +{ + for( int j = nFirstChar; j < nLastChar; j++ ) + { + if( lpBuf[j] == cTarget ) + { + return j; + } + } + + return -1; +} + diff --git a/src/flexbuf.h b/src/flexbuf.h new file mode 100644 index 0000000..5ce4a89 --- /dev/null +++ b/src/flexbuf.h @@ -0,0 +1,160 @@ +/**\flexbuf.h + * Describes the FlexBuf class. + *@author Mike Buland + */ + +#ifndef FLEXBUF_H +#define FLEXBUF_H + +/** Stores any amount of data, but starts small, growing as necesarry. + * It is optimized to work with stream type situations, with data being + * added to the end while it is being taken from the begning. + *@todo Set this class up to auto-shrink back to a specified sized buffer each + * time it has shrunk below that for enough operations. + *@author Mike Buland + */ +class FlexBuf +{ +public: + /** + * Construct a blank FlexBuf containing about 1k of buffer space. + */ + FlexBuf(); + + /** + * Clean up the FlexBuf, delete all buffers. + */ + ~FlexBuf(); + + /** Appends a whole string of data to the buffer. The string + * must be null terminated. + *@param lpData The data to append to the buffer. + *@param nDSize The size of the data described by lpData. If this + * value is -1 lpData is treated as a null-terminated string. + *@returns True if no problems occured, false otherwise. + */ + bool appendData( const char *lpData, int nDSize=-1 ); + + /** Appends a single character to the end of the buffer. + *@param lData The character to append to the buffer. + *@returns True if no problems occured, false otherwise. + */ + bool appendData( const char lData ); + + /** + * Append the short to the buffer. + *@param lData The short to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const short lData ); + + /** + * Append the int to the buffer. + *@param lData The int to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const int lData ); + + /** + * Append the long to the buffer. + *@param lData The long to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const long lData ); + + /** + * Append the float to the buffer. + *@param lData The float to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const float lData ); + + /** + * Append the double to the buffer. + *@param lData The double to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const double lData ); + + /** + * Append the unsigned char to the buffer. + *@param lData The unsigned char to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const unsigned char lData ); + + /** + * Append the unsigned short to the buffer. + *@param lData The unsigned short to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const unsigned short lData ); + + /** + * Append the unsigned int to the buffer. + *@param lData The unsigned int to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const unsigned int lData ); + + /** + * Append the unsigned long to the buffer. + *@param lData The unsigned long to add to the buffer queue. + *@returns True if everything is ok, false otherwise. + */ + bool appendData( const unsigned long lData ); + + /** Removes all pending data from the buffer. + *@returns True if no problems occured, false otherwise. + */ + bool clearData(); + + /** Gets a pointer to the internal buffer, at the begining of the current + * data stream. + *@returns A pointer to the internal data buffer. + */ + const char *getData(); + + /** Gets the length of the current buffer (how much data is really in the + * buffer, not it's current capacity, for that check getCapacity) + *@returns The length of the current buffer. + */ + int getLength(); + + /** Gets the current capacity of the FlexBuf. If the size nears this value + * then the entire buffer is resized to accomidate more data. + *@returns The current capacity of the FlexBuf. + */ + int getCapacity(); + + /** + * Removes nAmount bytes from the begning of the buffer. Actually, if + * nAmount happens to be negative it will remove tha absolute value of + * nValue bytes from the end of the buffer, like the old delData command. + *@param nAmount The number of bytes used. + *@returns True if everything was successful, false if there was an error. + */ + bool usedData( int nAmount ); + + /** Finds the first instance of the given character in the buffer and + * returns an index to it. + *@param cTarget The character you're looking for. + *@returns The index of the first instance of the given character, or + * -1 if it just wasn't found. + */ + int findChar( char cTarget ); + +private: + /** The raw storage location of the FlexBuf. */ + char *lpBuf; + /** The real size of the FlexBuf. */ + int nSize; + /** Where the last char is. */ + int nLastChar; + /** Where the first char is. */ + int nFirstChar; + /** The amount of real data in the FlexBuf. This is effectively nLastChar-nFirstChar. */ + int nFill; +}; + +#endif diff --git a/src/hashfunction.cpp b/src/hashfunction.cpp new file mode 100644 index 0000000..51f2259 --- /dev/null +++ b/src/hashfunction.cpp @@ -0,0 +1,10 @@ +#include "hashfunction.h" + +HashFunction::HashFunction() +{ +} + +HashFunction::~HashFunction() +{ +} + diff --git a/src/hashfunction.h b/src/hashfunction.h new file mode 100644 index 0000000..cbcf70f --- /dev/null +++ b/src/hashfunction.h @@ -0,0 +1,48 @@ +#ifndef HASH_FUNCTION +#define HASH_FUNCTION + +/** This represents the shell of a hash function. It must be aggregated in + * order to be used. Please read about it's two functions for specificatins + * relating to what values will be passed to them and what they should return + * for creating your own hash functions. + *@author Mike Buland. + */ +class HashFunction +{ +public: + /** + * Standard Constructor. + */ + HashFunction(); + + /** + * Standard Deconstructor. + */ + virtual ~HashFunction(); + + /** Hashes the value represnted by id. This must return a fairly unique + * number in the range of 0-2^32 (or whatever the size of an unsigned long + * is on your system) based on the id given. The faster the number changes + * the better in a general sence. The return value will be the index + * (after probing takes place) to the data assosiated with an id, so this + * function should always produce the same number for any given id. + *@param id The identifier to use to create a unique numerical identifier. + *@returns A mostly unique numerical identifier generated using the given + * id. + */ + virtual unsigned long int hash( const void *id ) = 0; + + /** This function must compare two ids in the format that this hashfunction + * accepts. For example, if the hash function hashes strings it should + * probably { return strcmp( id1, id2 ) == 0 }. + *@param id1 One value to use in the comparison + *@param id2 Another value to use in the comparison + *@returns True if the two values match, otherwise false. + */ + virtual bool cmpIDs( const void *id1, const void *id2 ) = 0; + +// virtual void *createPersistantID( const void *id ) = 0; +// virtual void destroyPersistantID( const void *id ) = 0; +}; + +#endif diff --git a/src/hashfunctioncasestring.cpp b/src/hashfunctioncasestring.cpp new file mode 100644 index 0000000..6361f45 --- /dev/null +++ b/src/hashfunctioncasestring.cpp @@ -0,0 +1,39 @@ +#include +#include +#include +#include "hashfunctioncasestring.h" + +HashFunctionCaseString::HashFunctionCaseString() +{ +} + +HashFunctionCaseString::~HashFunctionCaseString() +{ +} + +unsigned long int HashFunctionCaseString::hash( const void *id ) +{ + const char *str = (const char *)id; + unsigned long int nPos = 0; + for( int j = 0; str[j] != '\0'; j++ ) + { + nPos = tolower(str[j]) + (nPos << 6) + (nPos << 16) - nPos; +// nPos += nPos<<16|(((unsigned long int)tolower(str[j]))<<((j*7)%24)); + } + return nPos; +} + +bool HashFunctionCaseString::cmpIDs( const void *id1, const void *id2 ) +{ + const char *str1 = (const char *)id1; + const char *str2 = (const char *)id2; + + int j; + for( j = 0; str1[j] != '\0' && str2[j] != '\0'; j++ ) + { + if( tolower(str1[j]) != tolower(str2[j]) ) + return false; + } + return (str1[j]==str2[j]); +} + diff --git a/src/hashfunctioncasestring.h b/src/hashfunctioncasestring.h new file mode 100644 index 0000000..9ca3d48 --- /dev/null +++ b/src/hashfunctioncasestring.h @@ -0,0 +1,28 @@ +#ifndef HASH_FUNCTION_CASE_STRING +#define HASH_FUNCTION_CASE_STRING + +#include "hashfunction.h" + +/** A hash function for string data. This hash function does strings, but is + * actually generalized to handle any binary stream of characters terminated + * by a null character. This is different than HashFunctionString in that + * this does comparisons without regaurd to case. + *@author Mike Buland. + */ +class HashFunctionCaseString : public HashFunction +{ +public: + /** + * Standard Constructor. + */ + HashFunctionCaseString(); + + /** + * Standard Deconstructor. + */ + ~HashFunctionCaseString(); + unsigned long int hash( const void *id ); + bool cmpIDs( const void *id1, const void *id2 ); +}; + +#endif diff --git a/src/hashfunctionint.cpp b/src/hashfunctionint.cpp new file mode 100644 index 0000000..4bd0feb --- /dev/null +++ b/src/hashfunctionint.cpp @@ -0,0 +1,20 @@ +#include "hashfunctionint.h" + +HashFunctionInt::HashFunctionInt() +{ +} + +HashFunctionInt::~HashFunctionInt() +{ +} + +unsigned long int HashFunctionInt::hash( const void *id ) +{ + return (unsigned long)(id); +} + +bool HashFunctionInt::cmpIDs( const void *id1, const void *id2 ) +{ + return (unsigned long)(id1) == (unsigned long)(id2); +} + diff --git a/src/hashfunctionint.h b/src/hashfunctionint.h new file mode 100644 index 0000000..57bce89 --- /dev/null +++ b/src/hashfunctionint.h @@ -0,0 +1,26 @@ +#ifndef HASH_FUNCTION_INT +#define HASH_FUNCTION_INT + +#include "hashfunction.h" + +/** A hash function for integer data. Really, this does almost nothing except + * ensure we're dealing with positive indicies. + *@author Mike Buland. + */ +class HashFunctionInt : public HashFunction +{ +public: + /** + * Standard Constructor. + */ + HashFunctionInt(); + + /** + * Standard Deconstructor. + */ + ~HashFunctionInt(); + unsigned long int hash( const void *id ); + bool cmpIDs( const void *id1, const void *id2 ); +}; + +#endif diff --git a/src/hashfunctionstring.cpp b/src/hashfunctionstring.cpp new file mode 100644 index 0000000..8ea9f57 --- /dev/null +++ b/src/hashfunctionstring.cpp @@ -0,0 +1,36 @@ +#include "hashfunctionstring.h" + +HashFunctionString::HashFunctionString() +{ +} + +HashFunctionString::~HashFunctionString() +{ +} + +unsigned long int HashFunctionString::hash( const void *id ) +{ + const char *str = (const char *)id; + unsigned long int nPos = 0; + for( int j = 0; str[j] != '\0'; j++ ) + { + nPos = str[j] + (nPos << 6) + (nPos << 16) - nPos; +// nPos += nPos<<16|(((unsigned long int)str[j])<<((j*7)%24)); + } + return nPos; +} + +bool HashFunctionString::cmpIDs( const void *id1, const void *id2 ) +{ + const char *str1 = (const char *)id1; + const char *str2 = (const char *)id2; + + int j; + for( j = 0; str1[j] != '\0' && str2[j] != '\0'; j++ ) + { + if( str1[j] != str2[j] ) + return false; + } + return (str1[j]==str2[j]); +} + diff --git a/src/hashfunctionstring.h b/src/hashfunctionstring.h new file mode 100644 index 0000000..566f8ae --- /dev/null +++ b/src/hashfunctionstring.h @@ -0,0 +1,27 @@ +#ifndef HASH_FUNCTION_STRING +#define HASH_FUNCTION_STRING + +#include "hashfunction.h" + +/** A hash function for string data. This hash function does strings, but is + * actually generalized to handle any binary stream of characters terminated + * by a null character. + *@author Mike Buland. + */ +class HashFunctionString : public HashFunction +{ +public: + /** + * Standard Constructor. + */ + HashFunctionString(); + + /** + * Standard Deconstructor. + */ + ~HashFunctionString(); + unsigned long int hash( const void *id ); + bool cmpIDs( const void *id1, const void *id2 ); +}; + +#endif diff --git a/src/hashtable.cpp b/src/hashtable.cpp new file mode 100644 index 0000000..9dfe653 --- /dev/null +++ b/src/hashtable.cpp @@ -0,0 +1,345 @@ +#include +#include +#include + +#include "hashtable.h" + +HashTable::HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes ) +{ + hFunc = hNewFunc; + nTableSize = nextPrime( nInitSize ); + aTable = new HashNode[nTableSize]; + //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n"); + nSize = 0; + nFilled = 0; + this->bAllowDupes = bAllowDupes; +} + +HashTable::~HashTable() +{ + delete[] aTable; + delete hFunc; +} + +void HashTable::set( int j, const void *newID, const void *newData ) +{ + if( newData == NULL ) + { + printf("Inserting NULL data is indestinguishable from uninserted data!\n"); + } + aTable[j].id = newID; + aTable[j].data = newData; +} + +bool HashTable::isFilled( int j ) +{ + return (aTable[j].id != NULL)||(aTable[j].bDeleted); +} + +bool HashTable::reHash( unsigned long int nNewSize ) +{ + HashNode *aOldTable = aTable; + unsigned long int oldSize = nTableSize; + + // If the table can still be used if we just get rid of deleted items, don't + // change the size of the table, otherwise, go ahead and use the number + // passed in. + if( nSize > nTableSize>>1 ) + { + nTableSize = nextPrime( nNewSize ); + } + + aTable = newTable( nTableSize ); + //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n"); + + nSize = 0; + nFilled = 0; + + for( unsigned long int j = 0; j < oldSize; j++ ) + { + if( aOldTable[j].id != NULL && aOldTable[j].bDeleted == false ) + { + insert( aOldTable[j].id, aOldTable[j].data ); + } + } + + delete[] aOldTable; +} + +unsigned long int HashTable::probe( unsigned long int nStart, const void *id ) +{ + int nHash = nStart; + nStart = nStart%nTableSize; + if( bAllowDupes == true ) + { + for( + unsigned long int j=0; + isFilled( nStart ) && j < 32; + nStart = (nStart+(1<cmpIDs( aTable[nStart].id, id ) == true && + aTable[nStart].bDeleted == false ) + { + return nStart; + } + } + } + } + // This is our insurance, if the table is full, then go ahead and rehash, + // then try again. + if( isFilled( nStart ) ) + { + reHash( getCapacity()*2 ); + return probe( nHash, id ); + } + return nStart; +} + +HashTable::HashNode *HashTable::newTable( unsigned long int nNewSize ) +{ + return new HashNode[nNewSize]; +} + +#ifdef HASH_DEBUG_VIS +void HashTable::printDebugLine( const char *exData ) +{ + char *buf = new char[getCapacity()+3]; + int j; + buf[0] = '['; + for( j = 0; j < getCapacity(); j++ ) + { + buf[j+1] = (aTable[j].bDeleted)?('X'):((isFilled( j ))?('#'):('-')); + } + buf[j+1] = ']'; + buf[j+2] = '\0'; + printf("%s %s\n", buf, exData ); + delete[] buf; +} +#endif + +bool HashTable::insert( const void *id, const void *data ) +{ + unsigned long int nPos = probe( hFunc->hash( id ), id )%nTableSize; + + if( bAllowDupes == true ) + { + if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false ) + { + set( nPos, id, data ); +#ifdef HASH_DEBUG_VIS + printDebugLine( (const char *)id ); +#endif + nSize++; + nFilled++; + return true; + } + else + { + return false; + } + } + else + { + if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false ) + { + set( nPos, id, data ); +#ifdef HASH_DEBUG_VIS + printDebugLine( (const char *)id ); +#endif + nSize++; + nFilled++; + return true; + } + else if( hFunc->cmpIDs( aTable[nPos].id, id ) == true ) + { + set( nPos, id, data ); +#ifdef HASH_DEBUG_VIS + printDebugLine( (const char *)id ); +#endif + return true; + } + else + { + return false; + } + } +} + +const void *HashTable::get( const void *id, unsigned long int nSkip ) +{ + unsigned long int nPos = hFunc->hash( id )%nTableSize; + + for( unsigned long int j=0; j < 32; nPos = (nPos+(1<cmpIDs( id, aTable[nPos].id ) && + aTable[nPos].bDeleted == false ) + { + if( nSkip == 0 ) + { + return aTable[nPos].data; + } + else + { + nSkip--; + } + } + } + + return NULL; +} + +void *HashTable::getFirstItemPos() +{ + HashPos *pos = new HashPos; + return pos; +} + +const void *HashTable::getItemData( void *xPos ) +{ + return aTable[((HashPos *)xPos)->nPos].data; +} + +const void *HashTable::getItemID( void *xPos ) +{ + return aTable[((HashPos *)xPos)->nPos].id; +} + +void *HashTable::getNextItemPos( void *xPos ) +{ + HashPos *pos = (HashPos *)xPos; + if( pos->bStarted == false ) + { + pos->bStarted = true; + pos->nPos = 0; + } + else + { + pos->nPos++; + } + if( pos->nPos < nTableSize ) + { + for( ; pos->nPos < nTableSize; pos->nPos++ ) + { + if( isFilled( pos->nPos ) && + aTable[pos->nPos].bDeleted == false ) + { + return xPos; + } + } + } + + delete pos; + + return NULL; +} + +// Big-O sqrt(n) +// Change this to be erethpothynies table with a storage +// lookup later on. +bool HashTable::isPrime (int num) +{ + if (num == 2) // the only even prime + return true; + else if (num % 2 == 0) // other even numbers are composite + return false; + else + { + //bool prime = true; + int divisor = 3; + int upperLimit = static_cast(sqrt(num) + 1); + while (divisor <= upperLimit) + { + if (num % divisor == 0) + return false; + // prime = false; + divisor +=2; + } + return true; + } +} + +// Big-O n^(3/2) +int HashTable::nextPrime( int base ) +{ + int nPrime; + for( nPrime = base; isPrime( nPrime ) == false; nPrime++ ); + return nPrime; +} + +unsigned long int HashTable::getCapacity() +{ + return nTableSize; +} + +unsigned long int HashTable::getSize() +{ + return nSize; +} + +double HashTable::getLoad() +{ + return (double)(nFilled)/(double)(nTableSize); +} + +const void *HashTable::operator[](const void *id) +{ + return get( id ); +} + +bool HashTable::del( const void *id, int nSkip ) +{ + unsigned long int nPos = hFunc->hash( id )%nTableSize; + + for( unsigned long int j=0; j < 32; nPos = (nPos+(1<cmpIDs( id, aTable[nPos].id ) && + aTable[nPos].bDeleted == false ) + { + if( nSkip == 0 ) + { + aTable[nPos].bDeleted = true; +// aTable[nPos]. + nSize--; +#ifdef HASH_DEBUG_VIS + printDebugLine( (const char *)id ); +#endif + return true; + } + else + { + nSkip--; + } + } + } + + return false; +} + diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..d14be71 --- /dev/null +++ b/src/hashtable.h @@ -0,0 +1,299 @@ +/**\hashtable.h + * Describes the HashFunction, HashFunctionString, and HashTable classes. It + * was just easier to put them all in one set of files. + *@author Mike Buland + */ + +#ifndef HASH_TABLE_H +#define HASH_TABLE_H + +//Uncomment this line to see a cool text-mode visualization of what's going on +//#define HASH_DEBUG_VIS 1 + +#include +#include +#include + +#include "hashfunction.h" + +/** + * A simple yet flexable hash-table. This uses several tricks to help ensure + * that the table is always running at maximum efficiency. You no longer have + * to specify a "danger fill level" when more space is needed a rehash is + * automatically trigered. Deleting elements is fully supported, as well as + * duplicate elements. To work with and allow duplicates simple construct your + * HashTable the way you normally would, but when deleting or getting elements + * you can specify a skip value. This effectively allows you to treat elements + * with duplicate ID's as though they were in a zero-based array. The first + * element inserted with a given ID would be at skip zero, the next at skip 1 + * and so on. This allows you to quickly search for elements with duplicate + * names, just stop when you get a null for a skip number, i.e. + *
+ *   for( int j = 0;; j++ )
+ *   {
+ *       void *pData = hash.get( myID, j );
+ *       if( !pData ) break;
+ *       // Do something interesting with pData
+ *   }
+ * 
+ * There are new features in this HashTable that also allow for memory saving + * when dealing with systems where many elements are being deleted from the + * table. In those cases the elements deleted cannot be simply deleted, instead + * they have to be marked as deleted and hidden from the user, but maintained in + * the table so that future hashing operations don't fail. When rehashing + * occurs all elements marked as deleted are quietly removed. In these cases, + * if the number of deleted items would free enough space in the table for the + * table to be used efficiently without resizing, it is left the same size and + * rehashing is performed effectively in place, allowing the deleted items to + * be removed. + *
+ * For info on adding new hashing algorithms, please see the HashFunction class. + *@author Mike Buland + *@todo Fix probing for tables that allow duplicates, and delete an item, then + * insert an item with the same name. + */ +class HashTable +{ +public: + /** Constructs a hash table. + *@param hNewFunc A pointer to a hashfunction class to use. If this is + * null the default general string type will be used. + *@param nInitSize The initial size of the hashtable. + *@param bAllowDupes Setting this value to true allows the system to + * insert more than one copy of any given key. This can be tricky, and + * will require you to use the nSkip parameter on the get function. + */ + HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false ); + + /** + * Destroys the hashtable, cleaning up all internal storage, but not stored + * elements. Also deletes the HashFunction passed in in the constructor. + */ + ~HashTable(); + + /** Inserts an item into the hashtable. This function will trigger a + * rehash if adding another item would force the table's load factor over + * the danger level. + *@param id used to find the data later. + *@param data The data item to insert into the table with the identifier + * id + *@returns True if insertion was successfull, and false if it failed. + */ + bool insert( const void *id, const void *data ); + + /** Gets an item in the hashtable based on the id of that item. If there + * is more than one item with the same id you can use the nSkip parameter + * to access all of them. + *@param id The id of the item you're trying to find. + *@param nSkip The number of items with that id to skip before returning + * with the requested item. + *@returns A pointer to the data stored at the given id. + */ + const void *get( const void *id, unsigned long int nSkip=0 ); + + /** Gets the total capacity of the hashtable. This is actually the number + * of total positions available inside the hashtable at the moment. This + * will change when the hashtable's load exceeds the danger level. + * Please note that this is NOT the actual amount of space available. + * In reality you can only access about 45-50 percent of that space. + *@returns The total capacity. + */ + unsigned long int getCapacity(); + + /** Gets the number of filled in items in the hash table. This is roughly + * equivelent to the getSize function assosiated with the Lists. + *@returns The number of filled in items in the hash table. + */ + unsigned long int getSize(); + + /** Gets the load (percentage) of filled in items in the table. This is + * technically the size divided by the capacity, but is definately usefull + * since it's required to check if it's time to rehash. + *@returns The table load in the range 0.0 to 1.0 + */ + double getLoad(); + + /** Sets up an xPos object for use indexing the items in the table. Call + * this first and follow the directions for getNextItemPos below to + * iterate through every item in the table, while avoiding the empty + * spaces. + *@returns A pointer to a xPos object telling the hashtable where to find + * the item you're looking at. + */ + void *getFirstItemPos(); + + /** Get the item's data that is being pointed to by xPos. This is only + * valid after xPos was created using getFirstItemPos and getNextItemPos + * was called at least once. + *@param xPos supplied by getFirstItemPos. + *@returns The key value that was used to insert the data into the table. + */ + const void *getItemData( void *xPos ); + + /** Get the item's ID that is being pointed to by xPos. This is only + * valid after xPos was created using getFirstItemPos and getNextItemPos + * was called at least once. + *@param xPos supplied by getFirstItemPos. + *@returns The key value that was used to insert the data into the table. + */ + const void *getItemID( void *xPos ); + + /** Used for iterating through a hash table sequentially. This will + * update the xPos pointer to point to the next time, all ready to + * be accessed with getItemID and getItemData. This must be called at + * least once before xPos is meaningful, and will return a NULL when it + * has reached the last item. + *@param xPos This must be an object created by a call to the function + * getFirstItemPos, and is only meaningful to the internal routines. + * Aborting a call in the middle (not running to the end of the table) + * may result in a memory leak at the moment. + *@returns xPos if still iterating through the list, otherwise it will + * return NULL when the end has been reached and the xPos variable has + * been deleted. + */ + void *getNextItemPos( void *xPos ); + + /** A helpful operator to make accessing items easier. Please note that + * this simply returns a pointer to the data stored internally, and cannot + * be used like the STL operator to store new data, use insert for that. + *@param id The identifier used to store the requested item. + *@returns The data value assosiated with the given id, or NULL if it + * wasn't found in the table. + */ + const void *operator[](const void *id); + + /** + * Delete the specified item from the hashtable. This actually keeps the + * data and marks it deleted. For all intents and purposes to the user it + * is deleted, except that the space is still used until a rehash is forced. + * This means that in hashtables where elements are being inserted and + * deleted frequently you may run into a higher rate of expansion. + *@param id The ID to delete. + *@param nSkip The number of similar id's to skip before deleting in a + * hashtable that allows duplicates. + *@returns True if the element was found and deleted, false otherwise. + */ + bool del( const void *id, int nSkip=0 ); + +private: + /** + * Contains info related to a position in the hashtable. Used for + * searching through hashtables one item at a time, in order. This class + * should never be created by anything but a HashTable, and should never + * be referenced directly. Instead the hashtable returns a void pointer, + * which is what should be passed back in next time you use a search + * function. Always finish a search, since the object is deleted at the + * end of the search. + *@author Mike Buland + */ + class HashPos + { + public: + /** Create a blank HashPos. */ + HashPos() { bStarted=false; nPos = 0; }; + /** Has the search been started? */ + bool bStarted; + /** The position (index) into the backend storage structure. */ + unsigned long int nPos; + }; + + /** + * All data related to a single element in the hashtable. This should + * really only be used and manipulated by the HashTable itself. + *@author Mike Buland + */ + typedef struct HashNode + { + public: + /** Create a new, empty HashNode. */ + HashNode() { id = NULL; data = NULL; bDeleted = false; }; + /** A pointer to the original ID that was used to key the data. */ + const void *id; + /** A pointer to the data stored along with the above ID. */ + const void *data; + /** Weather or not this data should really...exist */ + bool bDeleted; + } HashNode; + +private: + /** + * Just sets the values in the element to some friendly values. + *@param newID The new ID to store. + *@param newData The new Data to store. + */ + void set( int j, const void *newID, const void *newData ); + /** + * Tells you if the node is filled or not. + *@returns True=an ID has been stored here, False=no ID. + */ + bool isFilled( int j ); + /** + * This actually resizes, but since every resize requires a reHash to go + * along with it, that's the name. This actually creates a new buffer for + * all of the contained data and then pulls every old element that was in + * the old table out and performs the hashing placement calculations again. + * This function skips all data that was marked as deleted, so at this + * point it really will be. + *@param nNewSize The new size to set the table to while re-hashing. + *@returns True if the operation was successful, false otherwise. + */ + bool reHash( unsigned long int nNewSize ); + + /** + * Helper function to allocate a new table. Really just does the memory + * allocation. + *@param nNewSize The size of the table to generate. + *@returns A new, blank array of HashNode objects the size you specified. + */ + HashNode *newTable( unsigned long int nNewSize ); + + /** + * This function is used once an actual hash code is obtained. nStart is + * the given hash code, which is then wrapped to the size of the table. If + * there is data at that location, tests are performed to see if it's the + * right one. If it is, then it is returned, otherwise a series of further + * tests based on a 2^n search pattern is performed. The position of the + * requested data in the back-end storage is returned if found, otherwise + * another less useful value is returned... + *@param nStart The initial hashcode of the ID testing for. + *@param id A pointer to the id that is being searched for. + *@returns The real location of the data requested. + */ + unsigned long int probe( unsigned long int nStart, const void *id ); + + /** + * Simple helper function to determine if a number is prime or not. + * This function runs in sqrt(n) time. + *@param num Number to test for prime-hood. + *@returns True if the number is prime, false otherwise. + */ + bool isPrime( int num ); + + /** + * Given any number, this function finds the first number after it that is + * prime. Since this number is a multiple internally it's rare that the + * starting number would be prime. + *@param base The number to start the prime search on. + *@returns The first prime after the number given. + */ + int nextPrime( int base ); + +#ifdef HASH_DEBUG_VIS + void printDebugLine( const char *exData ); +#endif + + /** A pointer to the HashFunction subclass instance to use. */ + HashFunction *hFunc; + /** The complete array of HashNode objects to store data in. */ + HashNode *aTable; + /** The actual size of the table, not how many elements are in it. */ + unsigned long int nTableSize; + /** The number of elements that are in the table. */ + unsigned long int nSize; + /** The number of elements that are unavailable now. */ + unsigned long int nFilled; + /** Allow duplicate ID's in the table. */ + bool bAllowDupes; +}; + +#endif diff --git a/src/http.cpp b/src/http.cpp new file mode 100644 index 0000000..11950b7 --- /dev/null +++ b/src/http.cpp @@ -0,0 +1,371 @@ +#include +#include +#include "http.h" +#include "hashfunctionstring.h" + +Http::Http( Connection *pConnection ) : hReqHeader( new HashFunctionString(), 100 ) +{ + pCon = pConnection; + nParseState = parseInit; +} + +Http::~Http() +{ + for( int j = 0; j < lStrings.getSize(); j++ ) + { + delete (std::string *)lStrings[j]; + } +} + +bool Http::parseRequest() +{ + for(;;) + { + pCon->readInput(); + switch( nParseState ) + { + case parseInit: + { + int nLen = pCon->scanInputFor( CR ); + if( nLen == -1 ) + { + return false; + } + else + { + nReqType = getRequestType( pCon->getInput() ); + pCon->usedInput( pCon->scanInputFor(' ')+1 ); + + nLen = pCon->scanInputFor(' '); + sReqURI.append( pCon->getInput(), nLen ); + pCon->usedInput( nLen+1 ); + + if( !strncmp( pCon->getInput(), "HTTP/", 5 ) ) + { + char mbuf[2]={'\0','\0'}; + unsigned char major, minor; + + pCon->usedInput( 5 ); + mbuf[0] = pCon->getInput()[0]; + major = (unsigned char)atoi(mbuf); + mbuf[0] = pCon->getInput()[2]; + minor = (unsigned char)atoi(mbuf); + setRequestVersion( major, minor ); + if( checkRequestVer() ) + { + nParseState = parseHeader; + } + else + { + setResponseStatus( statusHTTPVersionNotSupported ); + printf("Verson not supported.\n"); + return true; + } + + pCon->usedInput( 5 ); + } + else + { + setResponseStatus( statusBadRequest ); + } + + //return false; + } + } + break; + + case parseHeader: + { + int nLen = pCon->scanInputFor( CR ); + if( nLen == -1 ) + { + return false; + } + else if( nLen == 0 ) + { + // We've got our double-newline, time for content. + pCon->usedInput( 2 ); + setResponseStatus( statusOK ); + return true; + } + else + { + nLen = pCon->scanInputFor(':'); + if( nLen == -1 ) + { + printf("No colon? what are you trying to pull?\n"); + } + else + { + std::string *pName = new std::string( pCon->getInput(), nLen ); + lStrings.append( pName ); + pCon->usedInput( nLen+1 ); + + nLen = pCon->scanInputFor( CR ); + std::string *pValue = convSpaceString( pCon->getInput(), nLen ); + lStrings.append( pValue ); + pCon->usedInput( nLen+2 ); + + hReqHeader.insert( + pName->c_str(), + pValue->c_str() + ); + + printf("::%s = \"%s\"\n", + pName->c_str(), + pValue->c_str() + ); + } + } + } + break; + + case parseFinished: + break; + } + } +} + +bool Http::buildResponse( short nResponseCode, const char *sResponse ) +{ + if( nResponseCode > 0 ) + { + nResStatus = nResponseCode; + } + + if( sResponse == NULL ) + { + sResStatusStr = "uh yeah"; + } + else + { + sResStatusStr = sResponse; + } + + time_t curTime; + time( &curTime ); + gmtime_r( &curTime, &tResTime ); + + sServerStr = "YFHttp/0.0.1"; + bResPersistant = false; + + //char buf[30]; + //strftime( buf, 30, "%a, %d %b %Y %H:%M:%S GMT", &tResponseTime ); + + return true; +} + +bool Http::sendResponse() +{ + char buf[256]; + + sprintf( buf, "HTTP/1.1 %d %s\r\n", nResStatus, sResStatusStr.c_str() ); + pCon->appendOutput( buf ); + + strftime( buf, 256, "Date: %a, %d %b %Y %H:%M:%S GMT\r\n", &tResTime ); + pCon->appendOutput( buf ); + + sprintf( buf, "Server: %s\r\n", sServerStr.c_str() ); + pCon->appendOutput( buf ); + + if( bResPersistant ) + { + } + else + { + pCon->appendOutput("Connection: close\r\n"); + } + + sprintf( buf, "Content-Type: %s\r\n", sResMime.c_str() ); + pCon->appendOutput( buf ); + + sprintf( buf, "Content-Length: %d\r\n", sResContent.size() ); + pCon->appendOutput( buf ); + + pCon->appendOutput("\r\n"); + + pCon->appendOutput( sResContent.c_str(), sResContent.size() ); + + return true; +} + +void Http::setResponsePersistant( bool bPersistant ) +{ + bResPersistant = bPersistant; +} + +void Http::setResponseContent( const char *sMime, const char *sContent, int nLen ) +{ + sResMime = sMime; + sResContent.erase(); + sResContent.append( sContent, nLen ); +} + +std::string *Http::convSpaceString( const char *sStr, int nLen ) +{ + int nNewLen = 0; + bool bStart = true; + bool bSpace = false; + + for( int j = 0; j < nLen; j++ ) + { + if( sStr[j] == ' ' || sStr[j] == '\t' ) + { + if( bStart ) + { + } + else if( bSpace == false ) + { + bSpace = true; + nNewLen++; + } + } + else + { + bStart = false; + bSpace = false; + nNewLen++; + } + } + if( bSpace ) + { + nNewLen--; + } + + std::string *pSStr = new std::string; + //char *pStr = pSStr->c_str(); + nNewLen = 0; + bStart = true; + bSpace = false; + + for( int j = 0; j < nLen; j++ ) + { + if( sStr[j] == ' ' || sStr[j] == '\t' ) + { + if( bStart ) + { + } + else if( bSpace == false ) + { + bSpace = true; + *pSStr += ' '; + //pStr[nNewLen++] = ' '; + } + } + else + { + bStart = false; + bSpace = false; + *pSStr += sStr[j]; + //pStr[nNewLen++] = sStr[j]; + } + } + if( bSpace == true ) + { + nNewLen--; +// pStr[nNewLen] = '\0'; + } + + return pSStr; +} + +const char *Http::getRequestURI() +{ + return sReqURI.c_str(); +} + +short Http::getRequestType( const char *sType ) +{ + if( !strncmp( sType, "OPTIONS", 7 ) ) + { + return reqOptions; + } + else if( !strncmp( sType, "GET", 3 ) ) + { + return reqGet; + } + else if( !strncmp( sType, "HEAD", 4 ) ) + { + return reqHead; + } + else if( !strncmp( sType, "POST", 4 ) ) + { + return reqPost; + } + else if( !strncmp( sType, "PUT", 3 ) ) + { + return reqPut; + } + else if( !strncmp( sType, "DELETE", 6 ) ) + { + return reqDelete; + } + else if( !strncmp( sType, "TRACE", 5 ) ) + { + return reqTrace; + } + else if( !strncmp( sType, "CONNECT", 7 ) ) + { + return reqConnect; + } + else + { + printf(" Uh oh, extension!\n"); + return reqExtension; + } +} + +const char *Http::getRequestType( short nType ) +{ + switch( nType ) + { + case reqOptions: return "OPTIONS"; + case reqGet: return "GET"; + case reqHead: return "HEAD"; + case reqPost: return "POST"; + case reqPut: return "PUT"; + case reqDelete: return "DELETE"; + case reqTrace: return "TRACE"; + case reqConnect: return "CONNECT"; + case reqExtension: return "EXTENSION"; + default: return "INVALID VALUE"; + } +} + +short Http::getRequestType() +{ + return nReqType; +} + +const char *Http::getRequestTypeStr() +{ + return getRequestType( nReqType ); +} + +void Http::setResponseStatus( short nStatus ) +{ + nResStatus = nStatus; +} + +void Http::setRequestVersion( unsigned char nMajor, unsigned char nMinor ) +{ + cReqVersion = (nMajor<<4)|nMinor; +} + +unsigned char Http::getRequestMinorVer() +{ + return cReqVersion&0x0F; +} + +unsigned char Http::getRequestMajorVer() +{ + return cReqVersion>>4; +} + +bool Http::checkRequestVer() +{ + if( cReqVersion == HTTP11 ) + return true; + return false; +} + diff --git a/src/http.h b/src/http.h new file mode 100644 index 0000000..4ee4470 --- /dev/null +++ b/src/http.h @@ -0,0 +1,271 @@ +/**\file http.h + * Describe a Hyper Text Transfer Protocol processor. This class will allow + * any program to act as either an HTTP server, client, or both. It contains + * a number of additional helpers and subclasses. + *@author Mike Buland + */ + +#ifndef HTTP_H +#define HTTP_H + +#include +#include "connection.h" +#include "linkedlist.h" +#include "hashtable.h" + +#define CR '\r' /**< The ASCII value of a Carrage Return */ +#define LF '\n' /**< The ASCII value of a Line Feed */ +#define CRLF CR LF /**< Combo of CR+LF for use in http */ + +/** + * Macro to create combined http version codes. This just makes processing a + * little bit faster for the most part. + *@param maj Major version number, between 0 and 15 + *@param min Minor version number, between 0 and 15 + *@returns A one byte combined version number suitable for use in switches. + */ +#define HTTPVER( maj, min ) ((maj<<4)|(min)) + +#define HTTP10 HTTPVER( 1, 0 ) /**< Combined version code for http 1.0 */ +#define HTTP11 HTTPVER( 1, 1 ) /**< Combined version code for http 1.1 */ + +/** + * This is the master HTTP processing class. One instance handles one + * transaction, in the future a different mechanism may be thought up, but for + * now this means that you must create multiple objects to handle a single + * connection that contains multiple requests. + * In the constructor the Http class is given a connection object. This object + * should already be initialized and connected to whatever socket it wants to + * be sending and receiving data to and from. Once that's done you can call + * parseRequest if you're acting as a server, or a variety of buildRequest + * functions to create and send a request if you're a client. + * Please note that this class does not provide any HTTP or extended format + * processing systems, but will allow for mime types tables to be registered. + *@author Mike Buland + */ +class Http +{ +public: + /** + * Create an Http object tied to an existing connection object. + *@param pConnection The live connection object to deal with. + */ + Http( Connection *pConnection ); + + /** + * Standard Deconstructor. + */ + ~Http(); + + /** + * Perform all parsing needed to figure out what an HTTP client wants from + * us. This will setup a number of properties in the Http object itself + * and has the possibility of setting one or more response states initially. + * These states should be checked for immediately after parsing to see if + * an appropriate error message should be generated. These errors can + * include issues with protocol, data formats, or unknown versions of the + * protocol. + *@returns True means that all processing is finished, false means that + * the parseRequest function should be called again when more data is + * ready. A return value of true does not indicate success, only that + * processing is finished, the getResponseStatus function should be called + * to see what status was set in the parse routine. A 200 indicates that + * as far as the parser is concerned, everything when smoothly. Otherwise + * it's your responsibility to build the appropriate error response body + * (like an html file) and send it as the response. + */ + bool parseRequest(); + + /** + * Get a request type's internal Http object id based on the string + * representation. These can be any HTTP/1.1 standard request type. + *@param sType The string that should be checked for type. This is in all + * caps, just like if it came from the HTTP client, which is most often + * the case. + *@returns The numerical ID of the given request type. Please note that + * HTTP/1.1 standard specifies that any string is valid here as long as + * the non-basic string is a request type understood by the serving + * software. This means that anything that is non-standard will return + * a type reqExtension and not an error. This is not a mistake. + */ + short getRequestType( const char *sType ); + + /** + * Get the string representation of an Http object request type integer ID. + * This is used mainly for debugging to be sure the system has what we + * think it has. + *@param nType The integer ID of the request type to process. + *@returns The HTTP/1.1 string representation of that Http object ID code. + */ + const char *getRequestType( short nType ); + + /** + * Returns the Http object request type ID code that is stored in the + * object by either the parseRequest function or use of the buildRequest + * functions. + *@returns The ID of the request type stored in the object. + */ + short getRequestType(); + + /** + * Same as getRequestType, only returns the string representation. + *@returns The string representation of the request type ID stored in the + * object. + */ + const char *getRequestTypeStr(); + + /** + * Sets the version of the request used by the system. This will be used + * by parse request, but is also part of the buildRequest tool functions. + *@param nMajor The major version number. + *@param nMinor The minor version number. + */ + void setRequestVersion( unsigned char nMajor, unsigned char nMinor ); + + /** + * Gets the major version number of the protocol used/to be used in this + * request. + *@returns The major version number of the request protocol. + */ + unsigned char getRequestMinorVer(); + + /** + * Gets the minor version number of the protocol used/to be used in this + * request. + *@returns The minor version number of the request protocol. + */ + unsigned char getRequestMajorVer(); + + /** + * Checks the stored request version against an internal table of supported + * protocol versions. + *@returns True if the protocol version is supported, false otherwise. + */ + bool checkRequestVer(); + + /** + * Converts an arbitrary string to a new string object with space saving + * operations performed ala the HTTP/1.1 specs. All leading and trailing + * whitespace is stripped, and all whitespace within the string is reduced + * to a single space char. + *@param sStr A pointer to the string data to process. + *@param nLen The length of the string to process. Since this function is + * often called on stream data, there is no null terminator where we need + * one. This is here for convinience so the data doesn't need to be hacked + * up or moved to an intermediate buffer. + *@returns A new string that may well be shorter than the original but that + * will have the same value as far as the HTTP/1.1 specs are concerned. + */ + std::string *convSpaceString( const char *sStr, int nLen ); + + /** + * Gets a string pointer to the URI that was/is being requested. This can + * be any RFC standard URI, with or without protocol and domain. + *@returns A pointer to the URI that was/is being requested. + */ + const char *getRequestURI(); + + /** + * Set a new response status. This status can be anything that the HTTP + * specs allow. Other values are allowed as well, but beware, not all + * servers/clients will accept values that are not in the tables in this + * class. + *@param nStatus The status to set. + */ + void setResponseStatus( short nStatus ); + + bool buildResponse( short nResponseCode=-1, const char *sResponse=NULL ); + void setResponseContent( const char *sMime, const char *sContent, int nLen ); + void setResponsePersistant( bool bPersistant ); + bool sendResponse(); + + enum + { + reqOptions, + reqGet, + reqHead, + reqPost, + reqPut, + reqDelete, + reqTrace, + reqConnect, + reqExtension + }; + + enum + { + statusContinue = 100, + statusSwitchProto = 101, + + statusOK = 200, + statusCreated = 201, + statusAccepted = 202, + statusNonAuthInfo = 203, + statusNoContent = 204, + statusResetContent = 205, + statusPartialContent = 206, + + statusMultiChoices = 300, + statusMovedPermanently = 301, + statusFound = 302, + statusSeeOther = 303, + statusNotModified = 304, + statusUseProxy = 305, + statusUnused = 306, + statusTempRedirect = 307, + + statusBadRequest = 400, + statusUnauthorized = 401, + statusPaymentRequired = 402, + statusForbidden = 403, + statusNotFound = 404, + statusMethodNotAllowed = 405, + statusNotAcceptable = 406, + statusProxyAuthRequired = 407, + statusRequestTimeout = 408, + statusConflict = 409, + statusGone = 410, + statusLengthRequired = 411, + statusPreconditionFailed = 412, + statusRequestEntityTooLarge = 413, + statusRequestURITooLong = 414, + statusUnsupportedMediaType = 415, + statusRequestedRangeNotSatisfiable = 416, + statusExpectationFailed = 417, + + statusInternalServerError = 500, + statusNotImplemented = 501, + statusBadGateway = 502, + statusServiceUnavailable = 503, + statusGatewayTimeout = 504, + statusHTTPVersionNotSupported = 505 + }; + +private: + Connection *pCon; + unsigned char nParseState; + + short nReqType; + std::string *pReqStr; + std::string sReqURI; + unsigned char cReqVersion; + HashTable hReqHeader; + LinkedList lStrings; + + std::string sServerStr; + std::string sResMime; + std::string sResContent; + std::string sResStatusStr; + bool bResPersistant; + struct tm tResTime; + short nResStatus; + + enum + { + parseInit, + parseHeader, + parseFinished + }; +}; + +#endif diff --git a/src/linkedlist.cpp b/src/linkedlist.cpp new file mode 100644 index 0000000..78a615a --- /dev/null +++ b/src/linkedlist.cpp @@ -0,0 +1,227 @@ +/*************************************************************************** + linkedlist.cpp - description + ------------------- + begin : Sun Oct 19 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "linkedlist.h" + +LinkedList::LinkedList( ) +{ + pBase = NULL; + pTop = NULL; + pLast = NULL; + nSize = 0; + nLast = -1; +} + +LinkedList::~LinkedList( ) +{ +/* + Link *pCur = pBase; + while( pCur ) + { + Link *pLast = pCur; + pCur = pCur->pNext; + delete pLast; + } +*/ + empty(); +} + +void *LinkedList::getAt( int index ) +{ + if( index < 0 || index >= nSize ) + return NULL; + + return getPtrTo( index )->pData; +} + +void LinkedList::append( void *data ) +{ + if( pBase == NULL ) + { + pBase = new Link( data ); + pTop = pBase; + nSize++; + } + else + { + pTop->pNext = new Link( data ); + pTop = pTop->pNext; + nSize++; + } +} + +void LinkedList::insertBefore( void *data, int pos ) +{ + if( pos < 0 || pos > nSize ) + return; + + if( pos == 0 ) + { + Link *pTmp = new Link( data, pBase ); + if( pBase == NULL ) + { + pTop = pTmp; + } + pBase = pTmp; + if( nLast >= 0 ) nLast++; + nSize++; + } + else + { + Link *pCur; + if( (pCur = getPtrTo( pos-1 )) == NULL ) + { + return; + } + Link *pNew = new Link( data, pCur->pNext ); + pCur->pNext = pNew; + if( pNew->pNext == NULL ) + { + pTop = pNew; + } + if( nLast >= pos ) nLast++; + nSize++; + } +} + +int LinkedList::getSize( ) +{ + return nSize; +} + +bool LinkedList::isEmpty( ) +{ + if( nSize == 0 ) + return true; + return false; +} + +void LinkedList::deleteAt( int index ) +{ + if( index >= nSize || + pBase == NULL ) + return; + + if( index == 0 ) + { + Link *pTmp = pBase->pNext; + delete pBase; + pBase = pTmp; + if( nLast >= 0 ) nLast--; + nSize--; + if( pBase == NULL ) + { + pTop = NULL; + } + else if( pBase->pNext == NULL ) + { + pTop = pBase; + } + } + else + { + Link *pCur = getPtrTo( index-1 ); + if( pCur->pNext == pTop ) + { + pTop = pCur; + } + Link *pTmp; + if( pCur->pNext == NULL ) + { + pTmp = NULL; + } + else + { + pTmp = pCur->pNext->pNext; + } + delete pCur->pNext; + pCur->pNext = pTmp; + if( nLast == index ) nLast = -1; + else if( index < nLast ) nLast--; + nSize--; + } +} + +void LinkedList::empty() +{ + while( nSize > 0 ) + { + deleteAt( 0 ); + } +} + +void LinkedList::setSize( int newSize ) +{ + if( newSize < nSize ) + { + // Delete items off of the end of the list. + while( nSize > newSize ) + { + deleteAt( nSize-1 ); + } + } + else + { + // Add null items to the end of the list. + while( nSize < newSize ) + { + append( NULL ); + } + } +} + +void LinkedList::setAt( int index, void *data ) +{ + if( index >= nSize || index < 0 ) + return; + + getPtrTo( index )->pData = data; +} + +LinkedList::Link *LinkedList::getPtrTo( int index ) +{ + if( index < 0 || index >= nSize ) + return NULL; + if( index == nLast ) + { + return pLast; + } + if( index == 0 ) + { + pLast = pBase; + nLast = 0; + return pBase; + } + else + { + Link *pCur = pBase; + int nCur = 0; + if( nLast < index && nLast >= 0 ) + { + pCur = pLast; + nCur = nLast; + } + while( nCur != index ) + { + pCur = pCur->pNext; + nCur++; + } + nLast = index; + pLast = pCur; + return pCur; + } +} diff --git a/src/linkedlist.h b/src/linkedlist.h new file mode 100644 index 0000000..c45cc9b --- /dev/null +++ b/src/linkedlist.h @@ -0,0 +1,87 @@ +/**@file + * Describes the LinkedList implementation of the List ADT. + *@author Mike Buland + */ + +#ifndef LINKEDLIST_H +#define LINKEDLIST_H + +#include +#include "list.h" + +/** A linked-item implementation of the List ADT. Since the data is linked + * sequentially this is a great choice for lists that will grow and shrink + * a lot, but don't require as much random access. This implementation + * includes optomizations that make iterating through data, and appending + * items to the list take O(1) time. + *@author Mike Buland + */ +class LinkedList : public List +{ +public: + /** + * Construct a blank LinkedList. + */ + LinkedList(); + + /** + * Delete all list data, but do not delete any of the contained elements. + */ + ~LinkedList(); + + void *getAt( int nIndex ); + void append( void *pData ); + void insertBefore( void *pData, int nPos = 0 ); + int getSize( ); + bool isEmpty( ); + void deleteAt( int nIndex ); + void empty(); + void setSize( int nNewSize ); + void setAt( int nIndex, void *pData ); + +private: + /** + * A link in the linked list. + */ + class Link + { + public: + /** + * Construct an empty link. + */ + Link() + { + pData = NULL; + pNext = NULL; + } + /** + * Construct a link filled in with useful data. + *@param newData The data this link should hold. + *@param newNext The next link that this link should point to. + */ + Link( void *newData = NULL, Link * newNext = NULL ) + { + pData = newData; + pNext = newNext; + } + void *pData; /**< A pointer to the contained data. */ + Link *pNext; /**< A pointer to the next link in the chain */ + }; + + /** + * Finds a pointer to the link at index index. This is the core function + * called for all seek operations, and has been optimized as heavily as + * possible. + *@param index The zero-based index of the desired element. + *@returns A pointer to the requested Link, or NULL if it isn't found. + */ + Link *getPtrTo( int index ); + Link *pBase; /**< The first link in the list. */ + Link *pTop; /**< The Last link in the list. */ + Link *pLast; /**< The previously requested link. */ + int nSize; /**< The number of contained links. */ + int nLast; /**< The index of the previously requested link. */ +}; + +#endif + diff --git a/src/linkmessage.cpp b/src/linkmessage.cpp new file mode 100644 index 0000000..ce838f5 --- /dev/null +++ b/src/linkmessage.cpp @@ -0,0 +1,53 @@ +/*************************************************************************** + * Copyright (C) 2003 by Mike Buland * + * eichlan@yf-soft.com * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#include "linkmessage.h" +#include + +LinkMessage::LinkMessage( int nNewMsg ) +{ + nMsg = nNewMsg; +} + +LinkMessage::~LinkMessage() +{ +} + +/* +void LinkMessage::setBroadcast( bool bOn ) +{ + bBroadcast = bOn; +} + +bool LinkMessage::isBroadcast() +{ + return bBroadcast; +} + + +void LinkMessage::setFromID( int id ) +{ + nFromLinkID = id; +} + +int LinkMessage::getFromID() +{ + return nFromLinkID; +} + +void LinkMessage::setToID( int id ) +{ + nTargetLinkID = id; +} + +int LinkMessage::getToID() +{ + return nTargetLinkID; +} +*/ diff --git a/src/linkmessage.h b/src/linkmessage.h new file mode 100644 index 0000000..6cdfb2f --- /dev/null +++ b/src/linkmessage.h @@ -0,0 +1,39 @@ +/**\file linkmessage.h + */ + +#ifndef LINKMESSAGE_H +#define LINKMESSAGE_H + +/** + * A message to be broadcast accross ProgramLinks in a ProgramChain. Generally + * one would make a subclass of this in order to transmit more useful + * information, but sometimes it isn't necesarry. + *@author Mike Buland + */ +class LinkMessage +{ +public: + /** + * Construct a blank LinkMessage. + */ + LinkMessage() {}; + + /** + * Deconstruct a LinkMessage. + */ + virtual ~LinkMessage(); + + /** + * Create a LinkMessage object with a specific message assosiated with it + * to start with. + *@param nNewMsg The message to use in the Message object. + */ + LinkMessage( int nNewMsg ); + + /** + * The message contained in the Message object. + */ + int nMsg; +}; + +#endif diff --git a/src/list.cpp b/src/list.cpp new file mode 100644 index 0000000..c8b88c1 --- /dev/null +++ b/src/list.cpp @@ -0,0 +1,27 @@ +/*************************************************************************** + list.cpp - description + ------------------- + begin : Sun Oct 19 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "list.h" + +List::List( ) +{ +} + +List::~List( ) +{ +} + diff --git a/src/list.h b/src/list.h new file mode 100644 index 0000000..c71b328 --- /dev/null +++ b/src/list.h @@ -0,0 +1,101 @@ +#ifndef LIST_H +#define LIST_H + + +/** The basic List class ADT. This, on it's own, does absolutely nothing, but + * does define all standard interface functions to access a list. + *@author Mike Buland + */ +class List +{ +public: + /** + * Construct a list. + */ + List(); + + /** + * Desconstruct a list. + */ + virtual ~List(); + + /** Gets the value at a specified index. + *@param nIndex The index of the item to return. + *@returns The specified item, or NULL if the index was beyond the range + * of the list. + *@author Mike Buland + */ + virtual void *getAt( int nIndex ) = 0; + + /** Append the given data to the end of the list. This increases the + * size of the list by one. + *@param pData The data to append to the list. + *@author Mike Buland + */ + virtual void append( void *pData ) = 0; + + /** Inserts an item at the specified position in the list. The + * new item takes the index that you specify, and all other items + * are moved up one position. The size of the list is increased by + * one. + *@param pData The value to insert into the list. + *@param nPos Where to insert the data into the list. + *@author Mike Buland + */ + virtual void insertBefore( void *pData, int nPos = 0 ) = 0; + + /** Determines the size of the list, in elements. + *@returns The size of the list. + *@author Mike Buland + */ + virtual int getSize( ) = 0; + + /** Determines if the list is empty or not. + *@returns True if the list is empty, or false if the list has + * data in it (if the size is greater than zero). + *@author Mike Buland + */ + virtual bool isEmpty( ) = 0; + + /** Deletes an item at the specified index and moves all other + * values down one index. The size of the list is decreased by one. + *@param nIndex The index of the item to delete. + *@author Mike Buland + */ + virtual void deleteAt( int nIndex ) = 0; + + /** Completely empties the list, and sets the effective size to + * zero. + *@author Mike Buland + */ + virtual void empty() = 0; + + /** Sets the size of the list. This can be larger or smaller + * than what it was previously. If larger, new blank items will + * be added to the end of the list. If smaller than the old list + * items will be deleted from the end. + *@param nNewSize The new size of the list. + *@author Mike Buland + */ + virtual void setSize( int nNewSize ) = 0; + + /** Sets a member at a specified location to a new value. + * If the member being set is outside of the range of the + * current list it should be expanded. + *@param nIndex The zero-based index of the item to change. + *@param pData The new value for that index. + *@author Mike Buland + */ + virtual void setAt( int nIndex, void *pData ) = 0; + + /** Makes the List work like an array. Just say listObj[2] to get + * the third element. + *@param nIndex The index to access in the list. + *@returns A pointer to the data at element index. + *@author Mike Buland + */ + void *operator[]( int nIndex ) { return getAt( nIndex ); }; +}; + +#endif + diff --git a/src/md5.cpp b/src/md5.cpp new file mode 100644 index 0000000..ed7e4ac --- /dev/null +++ b/src/md5.cpp @@ -0,0 +1,190 @@ +#include +#include +#include +#include "md5.h" + +// This is a fun macro that tells us where the length char goes after the data +// section in the padded data segment. It's short for OBfuscation LOCaction. +#define OBLOC(len) ((((len + 64) >> 9) << 4) + 14) +// This performs a wrapping bitwise shift, kinda' fun! + +#define bit_roll( num, cnt ) \ + (((num) << (cnt)) | (((num) >> (32 - (cnt))) & ~(-1<<(cnt)))) + +//#define md5_cmn( q, a, b, x, s, t ) (bit_roll((a + q + x + t), s) + b) + +// The following are handy wrappers for the cmn function +#define md5_ff( a, b, c, d, x, s, t ) \ + (md5_cmn((b & c) | ((~b) & d), a, b, x, s, t)) + +#define md5_gg( a, b, c, d, x, s, t ) \ + (md5_cmn((b & d) | (c & (~d)), a, b, x, s, t)) + +#define md5_hh( a, b, c, d, x, s, t ) \ + (md5_cmn(b ^ c ^ d, a, b, x, s, t)) + +#define md5_ii( a, b, c, d, x, s, t ) \ + (md5_cmn(c ^ (b | (~d)), a, b, x, s, t)) + +inline long md5_cmn( long q, long a, long b, long x, long s, long t ) +{ + return bit_roll((a + q + x + t), s) + b; +} + +md5::md5() +{ +} + +md5::~md5() +{ +} + +/* + * Calculate the MD5 of an array of little-endian words, and a bit length + */ +void md5::core_md5( long *x, long len, md5sum *output ) +{ + long a = 1732584193, olda; + long b = -271733879, oldb; + long c = -1732584194, oldc; + long d = 271733878, oldd; + + for( long i = 0; i < len; i += 16 ) + { + olda = a; + oldb = b; + oldc = c; + oldd = d; + + a = md5_ff(a, b, c, d, x[i+ 0], 7 , -680876936); + d = md5_ff(d, a, b, c, x[i+ 1], 12, -389564586); + c = md5_ff(c, d, a, b, x[i+ 2], 17, 606105819); + b = md5_ff(b, c, d, a, x[i+ 3], 22, -1044525330); + a = md5_ff(a, b, c, d, x[i+ 4], 7 , -176418897); + d = md5_ff(d, a, b, c, x[i+ 5], 12, 1200080426); + c = md5_ff(c, d, a, b, x[i+ 6], 17, -1473231341); + b = md5_ff(b, c, d, a, x[i+ 7], 22, -45705983); + a = md5_ff(a, b, c, d, x[i+ 8], 7 , 1770035416); + d = md5_ff(d, a, b, c, x[i+ 9], 12, -1958414417); + c = md5_ff(c, d, a, b, x[i+10], 17, -42063); + b = md5_ff(b, c, d, a, x[i+11], 22, -1990404162); + a = md5_ff(a, b, c, d, x[i+12], 7 , 1804603682); + d = md5_ff(d, a, b, c, x[i+13], 12, -40341101); + c = md5_ff(c, d, a, b, x[i+14], 17, -1502002290); + b = md5_ff(b, c, d, a, x[i+15], 22, 1236535329); + + a = md5_gg(a, b, c, d, x[i+ 1], 5 , -165796510); + d = md5_gg(d, a, b, c, x[i+ 6], 9 , -1069501632); + c = md5_gg(c, d, a, b, x[i+11], 14, 643717713); + b = md5_gg(b, c, d, a, x[i+ 0], 20, -373897302); + a = md5_gg(a, b, c, d, x[i+ 5], 5 , -701558691); + d = md5_gg(d, a, b, c, x[i+10], 9 , 38016083); + c = md5_gg(c, d, a, b, x[i+15], 14, -660478335); + b = md5_gg(b, c, d, a, x[i+ 4], 20, -405537848); + a = md5_gg(a, b, c, d, x[i+ 9], 5 , 568446438); + d = md5_gg(d, a, b, c, x[i+14], 9 , -1019803690); + c = md5_gg(c, d, a, b, x[i+ 3], 14, -187363961); + b = md5_gg(b, c, d, a, x[i+ 8], 20, 1163531501); + a = md5_gg(a, b, c, d, x[i+13], 5 , -1444681467); + d = md5_gg(d, a, b, c, x[i+ 2], 9 , -51403784); + c = md5_gg(c, d, a, b, x[i+ 7], 14, 1735328473); + b = md5_gg(b, c, d, a, x[i+12], 20, -1926607734); + + a = md5_hh(a, b, c, d, x[i+ 5], 4 , -378558); + d = md5_hh(d, a, b, c, x[i+ 8], 11, -2022574463); + c = md5_hh(c, d, a, b, x[i+11], 16, 1839030562); + b = md5_hh(b, c, d, a, x[i+14], 23, -35309556); + a = md5_hh(a, b, c, d, x[i+ 1], 4 , -1530992060); + d = md5_hh(d, a, b, c, x[i+ 4], 11, 1272893353); + c = md5_hh(c, d, a, b, x[i+ 7], 16, -155497632); + b = md5_hh(b, c, d, a, x[i+10], 23, -1094730640); + a = md5_hh(a, b, c, d, x[i+13], 4 , 681279174); + d = md5_hh(d, a, b, c, x[i+ 0], 11, -358537222); + c = md5_hh(c, d, a, b, x[i+ 3], 16, -722521979); + b = md5_hh(b, c, d, a, x[i+ 6], 23, 76029189); + a = md5_hh(a, b, c, d, x[i+ 9], 4 , -640364487); + d = md5_hh(d, a, b, c, x[i+12], 11, -421815835); + c = md5_hh(c, d, a, b, x[i+15], 16, 530742520); + b = md5_hh(b, c, d, a, x[i+ 2], 23, -995338651); + + a = md5_ii(a, b, c, d, x[i+ 0], 6 , -198630844); + d = md5_ii(d, a, b, c, x[i+ 7], 10, 1126891415); + c = md5_ii(c, d, a, b, x[i+14], 15, -1416354905); + b = md5_ii(b, c, d, a, x[i+ 5], 21, -57434055); + a = md5_ii(a, b, c, d, x[i+12], 6 , 1700485571); + d = md5_ii(d, a, b, c, x[i+ 3], 10, -1894986606); + c = md5_ii(c, d, a, b, x[i+10], 15, -1051523); + b = md5_ii(b, c, d, a, x[i+ 1], 21, -2054922799); + a = md5_ii(a, b, c, d, x[i+ 8], 6 , 1873313359); + d = md5_ii(d, a, b, c, x[i+15], 10, -30611744); + c = md5_ii(c, d, a, b, x[i+ 6], 15, -1560198380); + b = md5_ii(b, c, d, a, x[i+13], 21, 1309151649); + a = md5_ii(a, b, c, d, x[i+ 4], 6 , -145523070); + d = md5_ii(d, a, b, c, x[i+11], 10, -1120210379); + c = md5_ii(c, d, a, b, x[i+ 2], 15, 718787259); + b = md5_ii(b, c, d, a, x[i+ 9], 21, -343485551); + + a = a + olda; + b = b + oldb; + c = c + oldc; + d = d + oldd; + } + + output->data[0] = a; + output->data[1] = b; + output->data[2] = c; + output->data[3] = d; + delete[] x; +} + +long *md5::c2l( const char *str, long len, long *nNewLen ) +{ + long len8 = len*8; + long mlen = OBLOC( len8 ); + long flen = (((mlen/16)+((mlen%16)?(1):(0))))*16; + long *aBin = new long[flen]; + memset( aBin, 0, flen ); + + for( long i = 0; i < len8; i+=8 ) + { + aBin[i>>5] |= ((long)str[i/8]) << (i%32); + } + + aBin[len8 >> 5] |= 0x80 << ((len8) % 32); + aBin[OBLOC( len8 )] = len8; + + (*nNewLen) = flen; + + return aBin; +} + +void md5::l2hexstr( long *binarray, char *str ) +{ + static const char hex_tab[] = {"0123456789abcdef"}; + //static char str[33]; + + int k = 0; + for( int i = 0; i < 16; i++) + { + str[k++] = hex_tab[(binarray[i>>2] >> ((i%4)*8+4)) & 0xF]; + str[k++] = hex_tab[(binarray[i>>2] >> ((i%4)*8 )) & 0xF]; + } +} + +void md5::sumString( md5sum *pSum, const char *sStr ) +{ + sumData( pSum, sStr, strlen( sStr ) ); +} + +void md5::sumData( md5sum *pSum, const char *aData, long nLen ) +{ + long nNewLen; + long *aOb = c2l( aData, nLen, &nNewLen ); + core_md5( aOb, nNewLen, pSum ); +} + +void md5::sumToHex( md5sum *pSum, char *sHex ) +{ + l2hexstr( pSum->data, sHex ); +} + diff --git a/src/md5.h b/src/md5.h new file mode 100644 index 0000000..810345e --- /dev/null +++ b/src/md5.h @@ -0,0 +1,81 @@ +#ifndef MD5_H +#define MD5_H + +/** + * Used to store an MD5 sum in a handy container. + */ +typedef struct +{ + /** The actual data-storage for an MD5 sum. */ + long data[4]; +} md5sum; + +/** + * Class for easily calculating MD5 sums of just about any data. + *@author Mike Buland + */ +class md5 +{ +public: + /** Build an MD5 sum builder. */ + md5(); + + /** Deconstruct */ + ~md5(); + + /** + * Create a sum of a standard c string, null terminated. This is probably + * the easiest function to use. + *@param pSum The MD5 sum structure to fill up. + *@param sStr The null-terminated string to turn into an MD5 sum. + */ + void sumString( md5sum *pSum, const char *sStr ); + + /** + * Create a sum of an array of arbitrary data. This is the most handy for + * dealing with files and so on. + *@param pSum The MD5 sum structure to fill up. + *@param aData A pointer to the base of the data to sum. + *@param nLen The number of bytes to use in the sum. + */ + void sumData( md5sum *pSum, const char *aData, long nLen ); + + /** + * Convert an md5sum to standard hex representation. Make sure that sHex + * contains at least 17 characters of space. + *@param pSum The sum structure to convert to hex. + *@param sHex The string to store the hex value in. + */ + void sumToHex( md5sum *pSum, char *sHex ); + +private: + /** + * Do the bulk of the work of the md5 algorithm. + *@param x I'm not sure. I'll need to look it up. + *@param len The length of the data. + *@param output The sum structure to put the output in. + */ + void core_md5( long *x, long len, md5sum *output ); + + /** + * Convert an array of charaters to an array of longs in a very crafty way. + * This also applies standard MD5 obfuscation to the resulting array, and + * makes it fit within MD5 size constraints. + *@param str The data to convert. + *@param len The length of the data. + *@param nNewLen A pointer to a variable that will hold the new length of + * the resulting array of longs. + *@returns The newly obfuscated and resized long array. + */ + long *c2l( const char *str, long len, long *nNewLen ); + + /** + * Backend helper to convert an array of longs into a hex string. + *@param binarray The binary data to convert. + *@param str The string to store the hex string in. + */ + void l2hexstr( long *binarray, char *str ); + +}; + +#endif diff --git a/src/multilog.cpp b/src/multilog.cpp new file mode 100644 index 0000000..64ff967 --- /dev/null +++ b/src/multilog.cpp @@ -0,0 +1,143 @@ +/*************************************************************************** + multilog.cpp - description + ------------------- + begin : Sat Sep 6 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "multilog.h" +#include +#include +#include +#include + +#include "multilogchannel.h" + +// This section is what we need to make this a singleton +// this makes this class easy to use from anywhere, without +// worrying about re-creating every output form and all of that crazy jazz +MultiLog *MultiLog::singleLog = NULL; + +MultiLog *MultiLog::getLog() +{ + if( singleLog == NULL ) + { + singleLog = new MultiLog; + atexit( cleanup ); + } + return singleLog; +} + +void MultiLog::cleanup() +{ + if( singleLog != NULL ) + { + delete singleLog; + singleLog = NULL; + } +} + +MultiLog::MultiLog() +{ + lChannel = new LinkedList(); + rEntry = new RingList( 150 ); + nEntriesLost = 0; +} + +MultiLog::~MultiLog() +{ + int nMax = lChannel->getSize(); + for( int j = 0; j < nMax; j++ ) + { + ((MultiLogChannel *)lChannel->getAt(j))->closeLog(); + delete ((MultiLogChannel *)lChannel->getAt(j)); + } + delete lChannel; + + for( int j = 0; j < rEntry->getSize(); j++ ) + { + delete (LogEntry *)rEntry->getAt( j ); + } + delete rEntry; +} +/* +void MultiLog::Log( int nLevel, const char *lpFormat, ...) +{ + switch( nLevel ) + { + default: + break; + } + va_list ap; + va_start(ap, lpFormat); + + vprintf( lpFormat, ap ); + + va_end(ap); +}*/ + +void MultiLog::DetailLog( int nLevel, const char *lpFile, int nLine, const char *lpFunction, const char *lpFormat, ...) +{ + LogEntry *e = new LogEntry(); + + va_list ap; + va_start(ap, lpFormat); + char *text; + vasprintf( &text, lpFormat, ap ); + va_end(ap); + + time( &e->xTime ); + e->nLevel = nLevel; + e->nLine = nLine; + e->lpFile = new char[strlen(lpFile)+1]; + strcpy( e->lpFile, lpFile ); + e->lpText = new char[strlen(text)+1]; + strcpy( e->lpText, text ); + free( text ); + + append( e ); +} + +void MultiLog::append( LogEntry *pEntry ) +{ + rEntry->append( pEntry ); + if( rEntry->getPushBuf() ) + { + delete (LogEntry *)rEntry->getPushBuf(); + nEntriesLost++; + } + + for( int j = 0; j < lChannel->getSize(); j++ ) + { + ((MultiLogChannel *)lChannel->getAt( j ))->append( pEntry ); + } +} + +void MultiLog::addChannel( MultiLogChannel *pChannel ) +{ + lChannel->append( pChannel ); + + pChannel->openLog(); + + for( int j = 0; j < rEntry->getSize(); j++ ) + { + pChannel->append( (LogEntry *)rEntry->getAt( j ) ); + } +} + +MultiLog::LogEntry::~LogEntry() +{ + delete[] lpFile; + delete[] lpText; +} + diff --git a/src/multilog.h b/src/multilog.h new file mode 100644 index 0000000..30ad8d7 --- /dev/null +++ b/src/multilog.h @@ -0,0 +1,145 @@ +#ifndef MULTILOG_H +#define MULTILOG_H + +#include +#include +#include + +#include "ringlist.h" +#include "linkedlist.h" + +/** + * Calls the DetailLog function but includes pre-processor macros to fill in + * most of the fields for you. This makes your life a lot easier, and makes the + * log useful for system debugging as well as just letting people know what's + * going on. + *@param LEVEL The log level, comes from an enum in the MultiLog class. + *@param FORMAT The text to store in the log, using printf style formatting. + *@param ... Parameters to help format the text in the FROMAT param. + */ +#define LineLog( LEVEL, FORMAT, ...) DetailLog( LEVEL, __FILE__, __LINE__, __PRETTY_FUNCTION__, FORMAT, ##__VA_ARGS__ ) + +/** MultiLog keeps track of logfile info in a myriad of varieties, and is + * easily configurable between them all. It allows output to the standard + * output, error output, files, networks, and streams, which includes memory + * buffers. + * MultiLog uses the singleton pattern to keep only a single instance of + * the log. Instead of instantiating a new copy, call the getLog method. + *@author Mike Buland + */ +class MultiLog +{ +public: + /** + * Keeps track of a single log entry, in a standard format, that can be + * processed by any MultiLogChannel derrived class. + *@author Mike Buland + */ + typedef struct LogEntry + { + /** Safely delete a log entry. */ + ~LogEntry(); + time_t xTime; /**< The time the log entry was made. */ + int nLevel; /**< The log-level of the entry. */ + char *lpFile; /**< The name of the file this entry came from. */ + int nLine; /**< The line number that this log came from. */ + char *lpText; /**< The text content of this log entry. */ + } LogEntry; + +private: + /** + * Private constructor, this ensures that this is a singleton. + */ + MultiLog(); + + /** + * The only instance of MultiLog ever. + */ + static MultiLog *singleLog; + + /** + * Append a new logentry to the log list, possibly pushing an old entry off. + *@param pEntry The new entry to append. + */ + void append( LogEntry *pEntry ); + + /** + * The actual log entry storage mechanism. + */ + RingList *rEntry; + + /** + * The number of entries that have rolled off the end of the RingList. + */ + unsigned long nEntriesLost; + + /** + * A list of all channels that are registered with the MultiLog. + */ + LinkedList *lChannel; + +public: + /** + * Destroy the multilog. + *@todo Why is this public? Does it need to be? + */ + ~MultiLog(); + + /** Sends info to the logfile. + *@param nLevel The type of data being logged (error, info, etc.) + *@param lpFormat The data to send to the log. + *@author Mike Buland + */ + //void Log( int nLevel, const char *lpFormat, ...); + + /** Sends info to the logfile with extra information, including the files + * that it was called from and the line in the code. Besides that, it's + * exactly the same as Log. Please use the LineLog macro to make DetailLog + * really easy to use. It operates exacly like Log, but inserts the + * builtin macros as the lpFile and nLine parameters. + *@param nLevel The type of data being logged (error, info, etc.) + *@param lpFile The name of the file that called the log function. + *@param nLine The line in the file that this log function was called from. + *@param lpFunction The name of the function that called the log function. + *@param lpFormat The data to send to the log. + *@author Mike Buland + */ + void DetailLog( int nLevel, const char *lpFile, int nLine, const char *lpFunction, const char *lpFormat, ...); + + /** Gets a pointer to the only instantion of the MultiLog that can exist. + * If there is no instantion in existance, it creates one, so it's + * foolproof. + *@returns A pointer to the only MultiLog instantion. + *@author Mike Buland + */ + static MultiLog *getLog(); + + /** Performs standard cleanup and deletes the only instantiation of MultiLog + * that can exist. This is just the same as delete and will nicely close + * all open logs. always call this when you are done with your MultiLog. + */ + static void cleanup(); + + /** + * Adds a logging channel to the MultiLog channel chain. Every added + * channel will automatically receive a complete log of everything that + * happened before the channel was added as well as all future messages. + *@param pChannel A pointer to the pre-contructed channel object to add. + */ + void addChannel( class MultiLogChannel *pChannel ); + + /** The various pre-defined levels available to use when logging. + * The person logging can make up their own, just make sure to remember + * which value is which (all levels are integers). + *@author Mike Buland + */ + enum Levels + { + LError, + LWarning, + LInfo, + LStatus + }; +}; + +#endif diff --git a/src/multilogchannel.cpp b/src/multilogchannel.cpp new file mode 100644 index 0000000..ee4c9bf --- /dev/null +++ b/src/multilogchannel.cpp @@ -0,0 +1,13 @@ +// +// C++ Implementation: multilogchannel +// +// Description: +// +// +// Author: Mike Buland , (C) 2005 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include "multilogchannel.h" + diff --git a/src/multilogchannel.h b/src/multilogchannel.h new file mode 100644 index 0000000..d891a65 --- /dev/null +++ b/src/multilogchannel.h @@ -0,0 +1,46 @@ +#ifndef MULTILOGCHANNEL_H +#define MULTILOGCHANNEL_H + +#include "multilog.h" + +/** + * The baseclass for any MultiLog output channel. Any class that implements + * all of these functions can be put in the log chain and will be sent + * messages from active MultiLoggers. + *@author Mike Buland + */ +class MultiLogChannel +{ +public: + /** + * Deconstruct a MultiLogChannel. + */ + virtual ~MultiLogChannel() {}; + + /** + * Should perform any operations that need to take place in order to start + * the output of data into this channel. This will be called once by the + * MultiLog when the MultiLogChannel is registered. + *@returns True means that everything can go as planned. False means that + * the MultiLog should remove this channel from the list and delete it. + */ + virtual bool openLog() = 0; + + /** + * Should append a log entry to the long, by whatever means are necesarry. + *@param pEntry The LogEntry to append. + *@returns True means that everything can go as planned. False means that + * the MultiLog should remove this channel from the list and delete it. + */ + virtual bool append( MultiLog::LogEntry *pEntry ) = 0; + + /** + * Should perform any operations that need to take place in order to safely + * close and cleanup the log. + *@returns True means that everything can go as planned. False means that + * the MultiLog should remove this channel from the list and delete it. + */ + virtual bool closeLog() = 0; +}; + +#endif diff --git a/src/multilogtext.cpp b/src/multilogtext.cpp new file mode 100644 index 0000000..be64595 --- /dev/null +++ b/src/multilogtext.cpp @@ -0,0 +1,152 @@ + +#include +#include +#include +#include +#include +#include +#include "multilogtext.h" + +MultiLogText::MultiLogText( const char *sFileName, const char *lpFormat ) +{ + this->lpFormat = NULL; + nFD = open( sFileName, O_WRONLY|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH ); + setLogFormat( lpFormat ); +} + +MultiLogText::MultiLogText( int nFileDesc, const char *lpFormat ) +{ + this->lpFormat = NULL; + nFD = nFileDesc; + setLogFormat( lpFormat ); +} + +MultiLogText::~MultiLogText() +{ + if( nFD != -1 ) + { + close( nFD ); + } + + delete[] lpFormat; +} + +bool MultiLogText::setLogFormat( const char *lpFormat ) +{ + char buf[200]; + int k = 0; + static char fmts[10][4]={ + {'y', 'd', '0', '1'}, + {'m', 'd', '0', '2'}, + {'d', 'd', '0', '3'}, + {'h', 'd', '0', '4'}, + {'M', 'd', '0', '5'}, + {'s', 'd', '0', '6'}, + {'l', 'd', '0', '7'}, + {'f', 's', '0', '8'}, + {'L', 'd', '0', '9'}, + {'t', 's', '1', '0'}, + }; + + for( int j = 0; lpFormat[j] != '\0'; j++ ) + { + if( lpFormat[j] == '%' ) + { + buf[k++] = '%'; + int nPlace = k++; + k++; + buf[k++] = '$'; + bool bDone = false; + for( j++; bDone == false; j++ ) + { + int l; + for( l = 0; l < 10; l++ ) + { + if( lpFormat[j] == fmts[l][0] ) + { + buf[nPlace] = fmts[l][2]; + buf[nPlace+1] = fmts[l][3]; + buf[k++] = fmts[l][1]; + bDone = true; + break; + } + } + if( l == 10 ) + { + buf[k++] = lpFormat[j]; + } + } + j--; + } + else + { + buf[k++] = lpFormat[j]; + } + } + buf[k++] = '\n'; + buf[k] = '\0'; + + if( this->lpFormat != NULL ) + { + delete[] this->lpFormat; + } + this->lpFormat = new char[k+1]; + strcpy( this->lpFormat, buf ); + + return true; +} + +bool MultiLogText::openLog() +{ + if( nFD == -1 ) + { + return false; + } + return true; +} + +bool MultiLogText::append( MultiLog::LogEntry *pEntry ) +{ + if( nFD == -1 ) + { + return false; + } + + char *line = NULL; + struct tm *pTime; + pTime = localtime( &pEntry->xTime ); + asprintf( + &line, + lpFormat, + pTime->tm_year+1900, + pTime->tm_mon+1, + pTime->tm_mday, + pTime->tm_hour, + pTime->tm_min, + pTime->tm_sec, + pEntry->nLevel, + pEntry->lpFile, + pEntry->nLine, + pEntry->lpText + ); + write( nFD, line, strlen(line) ); + free( line ); + + return true; +} + +bool MultiLogText::closeLog() +{ + if( nFD == -1 ) + { + return false; + } + // Don't close it if it's sdtout or errorout + if( nFD > 2 ) + { + close( nFD ); + } + nFD = -1; + return true; +} + diff --git a/src/multilogtext.h b/src/multilogtext.h new file mode 100644 index 0000000..aa32405 --- /dev/null +++ b/src/multilogtext.h @@ -0,0 +1,70 @@ +#ifndef MULTILOGTEXT_H +#define MULTILOGTEXT_H + +#include "multilogchannel.h" + +/** + * Simple MultiLogChannel that takes the logdata, formats it textually, and + * writes it to a text device, either a file or the screen, yay! This takes + * the place of the old standard logging facility. + * The entries in the format follow the standard printf % style, and are as + * follows: + *
    + *
  • %y - current year
  • + *
  • %m - current month
  • + *
  • %d - current day
  • + *
  • %h - current hour (24-hour format)
  • + *
  • %M - current minute
  • + *
  • %s - current seccond
  • + *
  • %l - Loglevel (numerical)
  • + *
  • %f - Filename
  • + *
  • %L - Line number
  • + *
  • %t - Full text of the log entry
  • + *
+ *@author Mike Buland + */ +class MultiLogText : public MultiLogChannel +{ +public: + /** + * Construct a MultiLogText object around a specific filename and format. + * The file named by sFileName will be opened for writting in text+append + * mode. No existing data will be destroyed. + *@param sFileName The file to output log-data to. + *@param lpFormat The format using the above specifications to be used for + * every log entry. + */ + MultiLogText( const char *sFileName, const char *lpFormat ); + + /** + * Construct a MultiLogText object around a specific file and format. + * The file descriptor passed in should describe an already opened and set- + * up file or device. This could easily be a socket or stdout or stderr. + *@param nFileDesc The already opened descriptor to send data to. + *@param lpFormat The format using the above specifications to be used for + * every log entry. + */ + MultiLogText( int nFileDesc, const char *lpFormat ); + + /** + * Destroy the object. + */ + ~MultiLogText(); + + bool openLog(); + bool append( MultiLog::LogEntry *pEntry ); + bool closeLog(); + + /** + * Change the log format on the fly. + *@param lpFormat The new format to use for all future log entries. + *@returns True if everything was fine, false for catastrophic failure. + */ + bool setLogFormat( const char *lpFormat ); + +private: + int nFD; /**< The file descriptor we're writing to. */ + char *lpFormat; /**< The format that we're using, converted for printf. */ +}; + +#endif diff --git a/src/pproc.cpp b/src/pproc.cpp new file mode 100644 index 0000000..f5cb869 --- /dev/null +++ b/src/pproc.cpp @@ -0,0 +1,60 @@ +#include +#include +#include "pproc.h" + +void processParams( int argc, char *argv[], PPROC *pproc ) +{ + // Loop over all the params except the first, no params, no looping! + for( int j = 1; j < argc; j++ ) + { + //printf("Param[%d]: \"%s\"\n", j, argv[j] ); + if( argv[j][0] == '-' ) + { + if( argv[j][1] == '-' ) + { + // Proccess a long-word param string + for( int k = 0; + pproc[k].proc != NULL || pproc[k].stateVar != NULL; + k++ ) + { + if( !strcmp( pproc[k].lpWord, &argv[j][2] ) ) + { + if( pproc[k].proc != NULL ) + { + j += pproc[k].proc( argc-j, &argv[j] ); + } + if( pproc[k].stateVar != NULL ) + { + (*(pproc[k].stateVar)) = pproc[k].bSetState; + } + } + } + } + else + { + // Process a one char param string + for( int k = 0; + pproc[k].proc != NULL || pproc[k].stateVar != NULL; + k++ ) + { + if( pproc[k].cChar == argv[j][1] ) + { + if( pproc[k].proc != NULL ) + { + j += pproc[k].proc( argc-j, &argv[j] ); + } + if( pproc[k].stateVar != NULL ) + { + (*(pproc[k].stateVar)) = pproc[k].bSetState; + } + } + } + } + } + else + { + // Handle generic params here. + } + } +} + diff --git a/src/pproc.h b/src/pproc.h new file mode 100644 index 0000000..bf5063c --- /dev/null +++ b/src/pproc.h @@ -0,0 +1,35 @@ +#ifndef PPROC_H_ +#define PPROC_H_ + +/** + * Contains all required info to handle a single program parameter. + *@author Mike Buland + */ +typedef struct PPROC +{ + const char *lpWord; /**< The full text-word to use as a param. */ + const char cChar; /**< The short char version of the param. */ + /** + * Pointer to the function to call when this param is triggered. + *@param argc The number of params after and including the one that + * triggered this call. + *@param argv The array of commandline tokens to use as parameters. + *@returns 0 for everything is ok. A number greater than zero signals that + * this parameter function used n parameters and they should be skipped by + * the processParams function. + */ + int (*proc)( int argc, char *argv[] ); + bool *stateVar; /**< A pointer to a bool to be setwhen this is triggered */ + bool bSetState; /**< The state to set the above bool to. */ +} PPROC; + +/** + * Process command line parameters based on a null-terminated array of PPROC + * structures. + *@param argc Should come straight from your main function's argc. + *@param argv Should come straight from your main function's argv. + *@param pproc The array of params that this function can respond to. + */ +void processParams( int argc, char *argv[], PPROC *pproc ); + +#endif /*PPROC_H_*/ diff --git a/src/pqueue.cpp b/src/pqueue.cpp new file mode 100644 index 0000000..1f0b8b5 --- /dev/null +++ b/src/pqueue.cpp @@ -0,0 +1,33 @@ +#include "pqueue.h" + +PQueue::PQueue( int nNewNumQueues ) +{ + nNumQueues = nNewNumQueues; + aQueue = new Queue[nNumQueues]; +} + +PQueue::~PQueue() +{ + delete[] aQueue; +} + +void PQueue::enqueue( void *pData, int nQueueLevel ) +{ + if( nQueueLevel < 0 || nQueueLevel >= nNumQueues ) + return; + + aQueue[nQueueLevel].enqueue( pData ); +} + +void *PQueue::dequeue() +{ + for( int j = 0; j < nNumQueues; j++ ) + { + if( aQueue[j].isEmpty() == false ) + { + return aQueue[j].dequeue(); + } + } + + return NULL; +} diff --git a/src/pqueue.h b/src/pqueue.h new file mode 100644 index 0000000..1b45f75 --- /dev/null +++ b/src/pqueue.h @@ -0,0 +1,48 @@ +#ifndef PQUEUE_H +#define PQUEUE_H + +#include "queue.h" + +/** Priority queue. This is just like a queue, but something with a higher + * priority will always come off the queue before something with a lower + * priority, even if it's added after. Otherwise works just like a queue. + *@author Mike Buland + */ +class PQueue +{ +public: + /** Create a queue with any number of different priorities. + *@param nNewNumQueues The number of queues, the default is 3 + */ + PQueue( int nNewNumQueues=3 ); + + /** + * Cleanup all contained queues. + */ + ~PQueue(); + + /** Add a new item to the queue at the specified priority. A lower + * number means a higher priority! + *@param pData A pointer to the data to add to the queue + *@param nQueueLevel The priority to set the new data to + */ + void enqueue( void *pData, int nQueueLevel ); + + /** Pull the next item off the queue, high priority first. + *@returns A pointer to the data that was next in the priority queue + */ + void *dequeue(); + +private: + /** + * The queues we use for real data storage. + */ + Queue *aQueue; + + /** + * The number of priorities or queus that we need. + */ + int nNumQueues; +}; + +#endif diff --git a/src/programchain.cpp b/src/programchain.cpp new file mode 100644 index 0000000..4e53ac8 --- /dev/null +++ b/src/programchain.cpp @@ -0,0 +1,113 @@ +/*************************************************************************** + programchain.cpp - description + ------------------- + begin : Sat Sep 6 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include +#include "programchain.h" + +ProgramChain::ProgramChain() +{ + pLog = MultiLog::getLog(); + pLog->LineLog( MultiLog::LStatus, "Program Chain Initialized." ); +} + +ProgramChain::~ProgramChain() +{ +} + +bool ProgramChain::addLink( ProgramLink *pLink ) +{ + if( pLink->init() == false ) + { + emergencyShutdown(); + return false; + } + + lLink.append( pLink ); + + pLink->setChain( this ); + + return true; +} + +ProgramLink *ProgramChain::getLink( const char *lpName ) +{ + char a; + a = lpName[0]; + return NULL; +} + +ProgramLink *ProgramChain::getBaseLink() +{ + return NULL; +} + +bool ProgramChain::execChainOnce() +{ + int nLen = lLink.getSize(); + for( int j = 0; j < nLen; j++ ) + { + if( ((ProgramLink *)lLink[j])->timeSlice() == false ) + { + pLog->LineLog( MultiLog::LInfo, "Shutting down due to signal from link #%d", j ); + emergencyShutdown(); + return false; + } + } + + return true; +} + +bool ProgramChain::enterChainLoop() +{ + for(;;) + { + if( execChainOnce() == false ) + { + return false; + } + } + + return true; +} + +void ProgramChain::emergencyShutdown() +{ + int nLen = lLink.getSize(); + for( int j = 0; j < nLen; j++ ) + { + ((ProgramLink *)lLink[j])->deInit(); + delete (ProgramLink *)lLink[j]; + } + lLink.empty(); +} + +LinkMessage *ProgramChain::broadcastIRM( LinkMessage *pMsgOut, ProgramLink *pSender ) +{ + int nLen = lLink.getSize(); + for( int j = 0; j < nLen; j++ ) + { + LinkMessage *pMsg = ((ProgramLink *)lLink[j])->processIRM( pMsgOut ); + if( pMsg != NULL ) + { + delete pMsgOut; + return pMsg; + } + } + + delete pMsgOut; + return NULL; +} diff --git a/src/programchain.h b/src/programchain.h new file mode 100644 index 0000000..34f64f8 --- /dev/null +++ b/src/programchain.h @@ -0,0 +1,88 @@ +#ifndef PROGRAMCHAIN_H +#define PROGRAMCHAIN_H + +#include "linkedlist.h" +#include "multilog.h" +#include "programlink.h" + +/** The Program Chain links together program "chunks" to more easily facilitate + * a generalized program loop with modular extensions. + *@author Mike Buland + */ +class ProgramChain +{ +public: + /** + * Construct an empty chain. + */ + ProgramChain(); + + /** + * Destroy your chain. + */ + ~ProgramChain(); + + /** Adds a link to the end of the chain. + *@param pLink A pointer to the link to add to the chain. + *@returns True if adding the link was successful, otherwise false + *@author Mike Buland + */ + bool addLink( ProgramLink *pLink ); + + /** Gets a link by name. + *@param lpName The name of the link you're looking for. Every link has a + * name, apparently. + *@returns A pointer to the specified ProgramLink, or NULL if none were found + * matching your criteria. + *@author Mike Buland + */ + class ProgramLink *getLink( const char *lpName ); + + /** Gets the very first link in the chain. + *@returns A pointer to the first link in the chain. + *@author Mike Buland + */ + class ProgramLink *getBaseLink(); + + /** Runs through the chain once. Useful if you want to have more control over + * the operation of the chain. + *@returns true if every link returned true. If at least one link returns false, + * then returns false. + *@author Mike Buland + */ + bool execChainOnce(); + + /** Enters the master chain loop, looping over the entire chain and executing + * every link's TimeSlice routine in order, over and over, until a link returns + * a false value. + *@returns False, always. It returns true unless a link returned false, but loops + * until a link does return false. + *@author Mike Buland + **/ + bool enterChainLoop(); + + /** Broadcasts an Immediate Response Message to all active links, save the + * sender. Whatever link first responds with a non-null response message + * will have it's messages sent back to the broadcasting link as the returns + * of this function call. Therefore it is very important that all message + * processing code is handled in a fairly timely fasion. + *@param pMsgOut The message to broadcast in hopes of a response. + *@param pSender The message that sent out the message and doesn't want to + * receive it's own message. This should always just be "this". + *@returns The message that was returned by the first link to return a + * non-null response. If all messages return null responses then this also + * returns null. Please note that whoever calls this will be responsible + * for deleting the message returned by it, if non-null. + */ + class LinkMessage *broadcastIRM( LinkMessage *pMsgOut, ProgramLink *pSender ); + +private: + /** + * Shuts down all operation no matter what point in the operation we were. + */ + void emergencyShutdown(); + MultiLog *pLog; /**< A pointer to a log. */ + LinkedList lLink; /**< The linked list that contains all of the links. */ +}; + +#endif diff --git a/src/programlink.cpp b/src/programlink.cpp new file mode 100644 index 0000000..de13be8 --- /dev/null +++ b/src/programlink.cpp @@ -0,0 +1,71 @@ +/*************************************************************************** + programlink.cpp - description + ------------------- + begin : Sat Sep 6 2003 + copyright : (C) 2003 by Mike Buland + email : eichlan@yf-soft.com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "programlink.h" +#include "programchain.h" + +ProgramLink::ProgramLink() +{ +} + +ProgramLink::~ProgramLink() +{ +} + +LinkMessage *ProgramLink::sendIRM( LinkMessage *pMsgOut ) +{ + return pChain->broadcastIRM( pMsgOut, this ); +} + +void ProgramLink::setChain( ProgramChain *pNewChain ) +{ + pChain = pNewChain; +} + +/* +void ProgramLink::postMessage( LinkMessage *pMsg, int nLvl ) +{ + if( nLvl == msgToChain ) + { + qMsgToChain.enqueue( pMsg ); + } + else if( nLvl == msgToLink ) + { + qMsgToLink.enqueue( pMsg ); + } + else + { + // ERROR! + } +} + +LinkMessage *ProgramLink::getMessage( int nLvl ) +{ + if( nLvl == msgToChain ) + { + return (LinkMessage *)qMsgToChain.dequeue(); + } + else if( nLvl == msgToLink ) + { + return (LinkMessage *)qMsgToLink.dequeue(); + } + else + { + // ERROR! + } +} +*/ diff --git a/src/programlink.h b/src/programlink.h new file mode 100644 index 0000000..6499fc2 --- /dev/null +++ b/src/programlink.h @@ -0,0 +1,99 @@ +#ifndef PROGRAMLINK_H +#define PROGRAMLINK_H + +class ProgramLink; +#include "queue.h" +#include "linkmessage.h" +#include "programchain.h" + +/** + * Program Link is the base class for any object that will be a piece of the + * main program chain loop. + *@author Mike Buland + */ +class ProgramLink +{ +friend class ProgramChain; +public: + /** + * Construct a program link. + */ + ProgramLink(); + + /** + * Deconstruct. + */ + virtual ~ProgramLink(); + + /** + * Initialization code required for a link that wasn't performed in the + * constructor. + *@returns true if initialization was successful. A false value will halt + * the chain. + */ + virtual bool init()=0; + + /** + * DeInitialization code that should happen, but doesn't belong in the + * destructor. + *@returns true means everything worked, false means failure, but is + * meaningless. + */ + virtual bool deInit()=0; + + /** + * Executed once per link per chain pass. Contains the guts of the program. + *@returns true if everything went well. A false value will halt the chain. + */ + virtual bool timeSlice()=0; + + /** + * This must be handled in order to process Instant Response Messages. + * This function should return null on all messages that it doesn't + * understand how to handle, and construct new messages to return to sender + * in the cases where it does understand. + *@param pMsgIn The message that must be processed. + *@returns Either a new message in cases where a response is required, + * or null if nothing needs to be done by this link. + */ + virtual LinkMessage *processIRM( LinkMessage *pMsgIn ) = 0; + + /** + * Broadcast a LinkMessage to all other links in the system. Each other + * link will get a call of their processIRM function. If the message gets + * a response then you will regain control immediately, otherwise the system + * will give all other Links a chance to respond before returning NULL. + *@param pMsgOut The message to broadcast. + *@returns The message response, or NULL if no Link understood your message. + */ + LinkMessage *sendIRM( LinkMessage *pMsgOut ); + +private: + /** + * Set which chain we're assosiated with. This is hope IRM messages make + * it out to the rest of the world. + *@param pNewChain A pointer to the containing program chain. + */ + void setChain( class ProgramChain *pNewChain ); + + /** + * The pointer to the containing chain. + */ + class ProgramChain *pChain; +/* + void postMessage( LinkMessage *pMsg, int nLvl ); + LinkMessage *getMessage( int nLvl ); + + enum + { + msgToChain, + msgToLink + }; + +private: + Queue qMsgToChain; + Queue qMsgToLink; +*/ +}; + +#endif diff --git a/src/protocol.cpp b/src/protocol.cpp new file mode 100644 index 0000000..1b2621f --- /dev/null +++ b/src/protocol.cpp @@ -0,0 +1,31 @@ +/*************************************************************************** + * Copyright (C) 2003 by Mike Buland * + * eichlan@yf-soft.com * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#include "protocol.h" + +Protocol::Protocol() +{ + pConnection = NULL; +} + +Protocol::~Protocol() +{ +} + +void Protocol::setConnection( Connection *pNewConnection ) +{ + pConnection = pNewConnection; + + onNewConnection(); +} + +Connection *Protocol::getConnection() +{ + return pConnection; +} diff --git a/src/protocol.h b/src/protocol.h new file mode 100644 index 0000000..cd18e37 --- /dev/null +++ b/src/protocol.h @@ -0,0 +1,58 @@ +#ifndef PROTOCOL_H +#define PROTOCOL_H + +#include "connection.h" + +/** This is the template for a class that handles specialized input and output + * to connections of different types with different protocols. + *@author Mike Buland + */ +class Protocol +{ +public: + /** Constructor */ + Protocol(); + /** Deconstructor */ + virtual ~Protocol(); + + /** + * Function is called every time there is new data on the line. This is + * called directly from the Connection class to process data. This is not + * called whever there is pending data on the input, but every time new data + * is added to the input buffer. + *@returns True if processing went alright, false if something went wrong, + * I suppose. In truth this value is thrown away right now. + *@todo Either make a return value of false mean something, or make these + * void. + */ + virtual bool onNewData()=0; + + /** + * Function is called when there is a new connection. This should only + * happen once per Protocol object, but gives each protocol object a + * chance to perform connection handshaking and initialization at a point + * where they know that they have a handle to an active Connection. + *@returns See onNewData + */ + virtual bool onNewConnection()=0; + + /** + * Sets the Protocol's Connection object. This is rather important, and + * handled usually by the ConnectionManager. + *@param pNewConnection The Connection object that this protocol will use to + * deal with the outside world. + */ + void setConnection( class Connection *pNewConnection ); + + /** + * Get a pointer to this object's Connection object, or NULL if one was + * never set. If used with the ConnectionManager that should never happen. + *@returns A pointer to the active Connection. + */ + Connection *getConnection(); + +private: + class Connection *pConnection; /**< The pointer to the Connection. */ +}; + +#endif diff --git a/src/protocoltelnet.cpp b/src/protocoltelnet.cpp new file mode 100644 index 0000000..7beea5b --- /dev/null +++ b/src/protocoltelnet.cpp @@ -0,0 +1,315 @@ +#include "protocoltelnet.h" +#include + +ProtocolTelnet::ProtocolTelnet() +{ + nTermType = termUnInited; + bEchoOn = true; +} + +ProtocolTelnet::~ProtocolTelnet() +{ +} + +bool ProtocolTelnet::onNewConnection() +{ + Connection *pCon = getConnection(); + + pCon->appendOutput( (char)IAC ); + pCon->appendOutput( (char)WILL ); + pCon->appendOutput( (char)SUPPRESSGA ); + + pCon->appendOutput( (char)IAC ); + pCon->appendOutput( (char)DO ); + pCon->appendOutput( (char)SUPPRESSGA ); + + pCon->appendOutput( (char)IAC ); + pCon->appendOutput( (char)DONT ); + pCon->appendOutput( (char)TERMTYPE ); + +// pCon->appendOutput( IAC ); +// pCon->appendOutput( SB ); +// pCon->appendOutput( TERMTYPE ); +// pCon->appendOutput( 1 ); +// pCon->appendOutput( IAC ); +// pCon->appendOutput( SE ); + + pCon->appendOutput( (char)IAC ); + pCon->appendOutput( (char)DONT ); + pCon->appendOutput( (char)ECHO ); + + pCon->appendOutput( (char)IAC ); + pCon->appendOutput( (char)WILL ); + pCon->appendOutput( (char)ECHO ); + +// 255(IAC),251(WILL),3 +} + +bool ProtocolTelnet::onNewData() +{ + Connection *pCon = getConnection(); + if( !pCon->hasInput() ) + { + return true; + } + + int nInSize = pCon->getInputAmnt(); + char *lpInStr = (char *)pCon->getInput(); + + // Here we interpret the basic commands and un-encapsulate them, so to + // speak. We'll allow this, even if the terminal is in raw mode, we + // just won't send anything in response... + for( int j = 0; j < nInSize; j++ ) + { + switch( (unsigned char)lpInStr[j] ) + { + case '\r': + fbEdited.appendData('\n'); + if( bEchoOn ) pCon->appendOutput("\n\r"); + break; + + case '\n': + break; + + case '\177': // backspace + if( fbEdited.getLength() > 0 ) + { + fbEdited.usedData( -1 ); // Delete one char from the end + if( bEchoOn ) pCon->appendOutput(ESC "[D"); // Move the cursor back one + if( bEchoOn ) pCon->appendOutput(ESC "[P"); // Delete one character + } + break; + + case '\x1B': // escape sequence + if( (unsigned char)lpInStr[j+1] == '[' ) + { + switch( (unsigned char)lpInStr[j+2] ) + { + case 'A': // Up + break; + + case 'B': // Down + break; + + case 'C': // Right + break; + + case 'D': // Left + break; + } + j+=2; + } + break; + + case 0: // NOP: No operation + break; + + case IAC: // IAC: Interpret as command + switch( lpInStr[j+1] ) + { + case SE: // SE: End of subnegotiation parameters. + break; + + case NOP: // NOP: No operation + break; + + case DM: // DM: Data mark. Indicates the position of a Synch event within the data stream. This should always be accompanied by a TCP urgent notification. + break; + + case BRK: // BRK: Break. Indicates that the "break" or "attention" key was hit. + break; + + case IP: // IP: Suspend, interrupt or abort the process to which the NVT is connected. + break; + + case AO: // AO: Abort output. Allows the current process to run to completion but do not send its output to the user. + break; + + case AYT: // AYT: Are you there. Send back to the NVT some visible evidence that the AYT was received. + break; + + case EC: // EC: Erase character. The receiver should delete the last preceding undeleted character from the data stream. + break; + + case EL: // EL: Erase line. Delete characters from the data stream back to but not including the previous CRLF. + break; + + case GA: // GA: Go ahead. Used, under certain circumstances, to tell the other end that it can transmit. + break; + + case SB: // SB: Subnegotiation of the indicated option follows. + switch( lpInStr[j+2] ) + { + case TERMTYPE: + if( lpInStr[j+3] == 0 ) + { + for( int k = 0; j+4+k < nInSize; k++ ) + { + if( (unsigned char)lpInStr[j+4+k] == IAC && + (unsigned char)lpInStr[j+5+k] == SE ) + { + lpInStr[j+4+k] = 0; + //@TODO: Do something with the term type... + printf("Term type: %s\n", &lpInStr[j+4] ); + j += 5+k; + } + } + } + else + { + } + break; + + default: + //printf("unknown subnegotiation parameters! (%d)\n", lpInStr[j+2] ); + break; + } + break; + + case WILL: // WILL: Indicates the desire to begin performing + switch( lpInStr[j+2] ) + { + case SUPPRESSGA: + j += 2; +// pCon->usedInput( 3 ); + break; + + case TERMTYPE: + j += 2; +// pCon->usedInput( 3 ); + break; + + case ECHO: + j += 2; +// pCon->usedInput( 3 ); + break; + + case NAWS: + default: + pCon->appendOutput( (char)ESC[0] ); + pCon->appendOutput( (char)DONT ); + pCon->appendOutput( lpInStr[j+2] ); + //printf("unknown will command used! (%d)\n", lpInStr[j+2] ); + j += 2; + break; + } + break; + + case WONT: // WONT: Indicates the refusal to perform + switch( lpInStr[j+2] ) + { + case ECHO: + j += 2; +// pCon->usedInput( 3 ); + break; + + default: + //printf("unknown wont command used! (%d)\n", lpInStr[j+2] ); + j += 2; + break; + } + break; + + case DO: // DO: Indicates the request that the other party perform + switch( lpInStr[j+2] ) + { + case ECHO: + j += 2; + break; + + case SUPPRESSGA: + j += 2; + break; + + default: + pCon->appendOutput( (char)ESC[0] ); + pCon->appendOutput( (char)DONT ); + pCon->appendOutput( lpInStr[j+2] ); + //printf("unknown do command used! (%d)\n", lpInStr[j+2] ); + j += 2; + break; + } +// pCon->usedInput( 3 ); + break; + + case DONT: // DONT: Indicates the demand that the other party stop performing + switch( lpInStr[j+2] ) + { + case ECHO: + j += 2; +// pCon->usedInput( 3 ); + break; + + default: + printf("unknown dont command used! (%d)\n", lpInStr[j+2] ); + j += 2; + break; + } + break; + } + break; + + default: + fbEdited.appendData( lpInStr[j] ); + if( bEchoOn ) pCon->appendOutput( lpInStr[j] ); + break; + } + } + + pCon->usedInput( pCon->getInputAmnt() ); + + return true; +} + +char *ProtocolTelnet::getLine( bool bFullOnly ) +{ + int i = fbEdited.findChar('\n'); + + if( i < 0 ) + { + if( bFullOnly == false ) + { + i = fbEdited.getLength(); + } + else + { + return NULL; + } + } + + char *lpStr = new char[i+1]; + strncpy( lpStr, fbEdited.getData(), i ); + lpStr[i] = '\0'; + + fbEdited.usedData( i+1 ); + + return lpStr; +} + +char *ProtocolTelnet::peekLine( bool bFullOnly ) +{ + int i = fbEdited.findChar('\n'); + + if( i < 0 ) + { + if( bFullOnly == false ) + { + i = fbEdited.getLength(); + } + else + { + return NULL; + } + } + + char *lpStr = new char[i+1]; + strncpy( lpStr, fbEdited.getData(), i ); + lpStr[i] = '\0'; + + return lpStr; +} + +void ProtocolTelnet::setEcho( bool bEchoOn ) +{ + this->bEchoOn = bEchoOn; +} diff --git a/src/protocoltelnet.h b/src/protocoltelnet.h new file mode 100644 index 0000000..4b2fb32 --- /dev/null +++ b/src/protocoltelnet.h @@ -0,0 +1,77 @@ +#ifndef PROTOCOLTELNET_H +#define PROTOCOLTELNET_H + +#include "protocol.h" +#include "flexbuf.h" + +#define ESC "\x1B" /**< A telnet escape code. */ + +/** Handles all specialized protocol actions related to the telnet protocol. + * This includes setting modes, non-scrollable regions, and so on. + *@author Mike Buland + */ +class ProtocolTelnet : public Protocol +{ +public: + ProtocolTelnet(); + ~ProtocolTelnet(); + + bool onNewData(); + bool onNewConnection(); + + char *getLine( bool bFullOnly = true ); + char *peekLine( bool bFullOnly = true ); + + void setEcho( bool bEchoOn = true ); + + enum + { + termUnInited, + termRaw, + termUnknown, + termVT220, + termXTerm + }; + + enum + { + SE = 240, // SE: End of subnegotiation parameters. + NOP = 241, // NOP: No operation + DM = 242, // DM: Data mark. Indicates the position of a Synch event within the data stream. This should always be accompanied by a TCP urgent notification. + BRK = 243, // BRK: Break. Indicates that the "break" or "attention" key was hit. + IP = 244, // IP: Suspend, interrupt or abort the process to which the NVT is connected. + AO = 245, // AO: Abort output. Allows the current process to run to completion but do not send its output to the user. + AYT = 246, // AYT: Are you there. Send back to the NVT some visible evidence that the AYT was received. + EC = 247, // EC: Erase character. The receiver should delete the last preceding undeleted character from the data stream. + EL = 248, // EL: Erase line. Delete characters from the data stream back to but not including the previous CRLF. + GA = 249, // GA: Go ahead. Used, under certain circumstances, to tell the other end that it can transmit. + SB = 250, // SB: Subnegotiation of the indicated option follows. + WILL = 251, // WILL: Indicates the desire to begin performing, or confirmation that you are now performing, the indicated option. + WONT = 252, // WONT: Indicates the refusal to perform, or continue performing, the indicated option. + DO = 253, // DO: Indicates the request that the other party perform, or confirmation that you are expecting the other party to perform, the indicated option. + DONT = 254, // DONT: Indicates the demand that the other party stop performing, or confirmation that you are no longer expecting the other party to perform, the indicated option. + IAC = 255 // IAC: Interpret as command + }; + + enum + { + ECHO = 1, // Explain who'll echo + SUPPRESSGA = 3, // Suppress Go Ahead + TERMTYPE = 24, // Terminal Type + NAWS = 31, // Window size + TERMSPEED = 32, // Terminal Speed + LINEMODE = 34 // Linemode + }; + +private: + int nTermType; + + int nTermWidth; + int nTermHeight; + + FlexBuf fbEdited; + + bool bEchoOn; +}; + +#endif diff --git a/src/queue.cpp b/src/queue.cpp new file mode 100644 index 0000000..42999fe --- /dev/null +++ b/src/queue.cpp @@ -0,0 +1,26 @@ +#include "queue.h" + +void Queue::enqueue( void *data ) +{ + lQueueData.append( data ); +} + +void *Queue::dequeue() +{ + void *dat = lQueueData[0]; + if( dat != NULL ) + { + lQueueData.deleteAt( 0 ); + } + return dat; +} + +bool Queue::isEmpty() +{ + return lQueueData.isEmpty(); +} + +void Queue::empty() +{ + lQueueData.empty(); +} diff --git a/src/queue.h b/src/queue.h new file mode 100644 index 0000000..692f5d8 --- /dev/null +++ b/src/queue.h @@ -0,0 +1,45 @@ +#ifndef QUEUE_H +#define QUEUE_H +#include "linkedlist.h" + +/** + * An ultra-simple queue implementation. It just uses a linked list as it's + * container so we don't have to worry about anything! + *@author Mike Buland + */ +class Queue +{ +public: + /** + * Puts a new item at the end of the queue. + *@param data A new value to put at the end of the queue. + */ + void enqueue( void *data ); + + /** + * Gets the begining item of the queue off and returns it. + *@returns The value at the front of the queue. + */ + void *dequeue(); + + /** + * Checks if the queueu is empty. + *@returns True if the queueu is empty, and false if it has things in it. + */ + bool isEmpty(); + + /** + * Empty the queue. + */ + void empty(); + + /** + * Get a pointer to the internal list object. + *@returns A pointer to the internal list object. + */ + LinkedList *getList() { return &lQueueData; }; + +private: + LinkedList lQueueData; /**< Where all of the real data is stored. */ +}; +#endif diff --git a/src/ringlist.cpp b/src/ringlist.cpp new file mode 100644 index 0000000..9efbbc4 --- /dev/null +++ b/src/ringlist.cpp @@ -0,0 +1,106 @@ +// +// C++ Implementation: ringlist +// +// Description: +// +// +// Author: Mike Buland , (C) 2005 +// +// Copyright: See COPYING file that comes with this distribution +// +// +#include + +#include "ringlist.h" + +RingList::RingList( int nInitSize ) + : List() +{ + nFirstIndex = 0; + nRealLength = nInitSize; + nDataLength = 0; + apData = new void*[nInitSize]; + pPushBuf = NULL; +} + +RingList::~RingList() +{ + delete[] apData; +} + +void *RingList::getAt( int nIndex ) +{ + if( nIndex < 0 || nIndex >= nDataLength ) + { + return NULL; + } + + return apData[(nFirstIndex+nIndex)%nRealLength]; +} + +void RingList::append( void *pData ) +{ + int nIndex = (nFirstIndex+nDataLength)%nRealLength; + + pPushBuf = apData[nIndex]; + apData[nIndex] = pData; + + if( nDataLength == nRealLength ) + { + nFirstIndex = (nFirstIndex+1)%nRealLength; + } + else + { + nDataLength++; + // We really didn't need it this time... + pPushBuf = NULL; + } +} + +void RingList::insertBefore( void *pData, int nPos ) +{ + // Not implemented right now, don't even try it! +} + +int RingList::getSize() +{ + return nDataLength; +} + +bool RingList::isEmpty() +{ + return nDataLength==0; +} + +void RingList::deleteAt( int nIndex ) +{ + // Also not implemented yet +} + +void RingList::empty() +{ + nFirstIndex = 0; + nDataLength = 0; +} + +void RingList::setSize( int nNewSize ) +{ + if( apData ) + { + delete[] apData; + } + nFirstIndex = 0; + nRealLength = nNewSize; + nDataLength = 0; + apData = new void*[nNewSize]; +} + +void RingList::setAt( int nIndex, void *pData ) +{ + apData[(nIndex+nFirstIndex)%nRealLength] = pData; +} + +void *RingList::getPushBuf() +{ + return pPushBuf; +} diff --git a/src/ringlist.h b/src/ringlist.h new file mode 100644 index 0000000..1a4d3a9 --- /dev/null +++ b/src/ringlist.h @@ -0,0 +1,112 @@ +#ifndef RINGLIST_H +#define RINGLIST_H + +#include "list.h" + +/** + * A RingList or Ring Buffer implementation. This is a list that never grows in + * size once it is created, but instead once it is full new items added to the + * RingList replace the oldest items and the zero-index is virtually shifted. + * Since no data is actually moved when zero-index moves, this is very + * efficient. + *
+ * The items removed are not actually deleted by the RingList, so instead they + * are first moved into a temporary "Push Buffer" that can be accessed so that + * elements pushed off the edge of the RingList can be accessed for cleanup. + *@author Mike Buland + */ +class RingList : public List +{ +public: + /** + * Construct a RingList with a fixed initial size. This size never changes + * unless setSize is called later during normal operation. + *@param nInitSize The number of elements to allocate. + */ + RingList( int nInitSize ); + + /** + * Clean up the data structures, but not the contained elements. + */ + ~RingList(); + + /** + * Get an element at the specified index. + *@param nIndex The index of the element to retreive. + *@returns A pointer to the requested element, or NULL if the element is + * not found or not initialized yet. + */ + void *getAt( int nIndex ); + + /** + * Append an element to the end of the list, overwriting the begining if + * necesarry. + *@param pData The pointer to append to the RingList. + */ + void append( void *pData ); + + /** + * Insert an element before another in the RingList, pushing all after it + * down the list. + *@param pData The data to insert. + *@param nPos The position that new the element should occupy in the list. + */ + void insertBefore( void *pData, int nPos = 0 ); + + /** + * Get the size of the array. + */ + int getSize(); + + /** + * Is the RingList empty? + *@returns True if it is empty, false otherwise. + */ + bool isEmpty(); + + /** + * Delete an element in the list, moving all later elements down one index. + *@param nIndex The index of the element to delete. + */ + void deleteAt( int nIndex ); + + /** + * Remove all elements from the RingList. + */ + void empty(); + + /** + * Set a new size for the RingList. Be careful with this one, if shrinking + * this may quietly create a memory leak. + *@param nNewSize The new size to set the RingList to. + *@todo Either fix this memory leak somehow or remove this function. + */ + void setSize( int nNewSize ); + + /** + * Set a specific element to a new value. + *@param nIndex The zero-based index to change the value of. + *@param pData The data to put at the location specified by nIndex. + */ + void setAt( int nIndex, void *pData ); + + /** + * Retrieve the contents of the push buffer. This is the data that is + * pushed off the end of the array if you append data and the list is full. + * This should be checked after every append operation to be sure there + * isn't anything that needs deleting. + *@returns The last value pushed off the RingList, or NULL if nothing was + * pushed off. + */ + void *getPushBuf(); + +private: + int nFirstIndex; /**< The index to be translated as zero. */ + int nRealLength; /**< The Amount of storage space available. */ + int nDataLength; /**< The number of elements filled in. */ + void **apData; /**< The actual data storage. */ + void *pPushBuf; /**< The push buffer. */ + +}; + +#endif diff --git a/src/stack.cpp b/src/stack.cpp new file mode 100644 index 0000000..8d9565c --- /dev/null +++ b/src/stack.cpp @@ -0,0 +1,33 @@ +#include "stack.h" + +void Stack::push( void *data ) +{ + lStackData.append( data ); +} + +void *Stack::top() +{ + return lStackData.getAt( lStackData.getSize()-1 ); +} + +void Stack::pop() +{ + lStackData.deleteAt( lStackData.getSize()-1 ); +} + +void *Stack::poptop() +{ + void *dat = top(); + pop(); + return dat; +} + +bool Stack::isEmpty() +{ + return lStackData.isEmpty(); +} + +void Stack::empty() +{ + lStackData.empty(); +} diff --git a/src/stack.h b/src/stack.h new file mode 100644 index 0000000..30e2a19 --- /dev/null +++ b/src/stack.h @@ -0,0 +1,50 @@ +#ifndef STACK_H +#define STACK_H +#include "linkedlist.h" + +/** An ultra-simple stack implementation that just uses a linked list. + *@author Mike Buland + */ +class Stack +{ +public: + /** Pushes a new value onto the top of the stack. + *@param data A new value for the stack. + *@author Mike Buland + */ + void push( void *data ); + + /** Returns the top value off of the stack, but doesn't remove it from the + * stack. + *@returns The value at the top of the stack. + *@author Mike Buland + */ + void *top(); + + /** Pops the top item off of the stack. + *@author Mike Buland + */ + void pop(); + + /** Gets the top item off of the stack, pops it off the stack, and returns + * it. + *@returns The value at the top of the stack. + *@author Mike Buland + */ + void *poptop(); + + /** Checks if the stack is empty. + *@returns True if the stack is empty, and false if it has things in it. + *@author Mike Buland + */ + bool isEmpty(); + + /** Empty the stack. + *@author Mike Buland + */ + void empty(); + +private: + LinkedList lStackData; /**< The actual stack data. */ +}; +#endif diff --git a/src/test/hashtest.cpp b/src/test/hashtest.cpp new file mode 100644 index 0000000..f31a3f8 --- /dev/null +++ b/src/test/hashtest.cpp @@ -0,0 +1,107 @@ +#include +#include +#include "hashtable.h" +#include "hashfunctioncasestring.h" + +int main() +{ + const char *names[]={ + "Homer the Great", + "And Maggie Makes Three", + "Bart's Comet", + "Homie The Clown", + "Bart Vs Australia", + "Homer vs Patty and Selma", + "A star is burns", + "Lisa's Wedding", + "Two Dozen and One Greyhounds", + "The PTA Disbands", + "Round Springfield", + "The Springfield connection", + "Lemon of Troy", + "Who Shot Mr. Burns (Pt. 1)", + "Who Shot Mr. Burns (pt. 2)", + "Radioactive Man", + "Home Sweet Homediddly-dum-doodly", + "Bart Sells His Soul", + "Lisa the Vegetarian", + "Treehouse of horror VI", + "King Size Homer", + "Mother Simpson", + "Sideshow Bob's Last Gleaming", + "The Simpson's 138th Show Spectacular", + "Marge Be Not Proud", + "Team Homer", + "Two Bad Neighbors", + "Scenes From the Class Struggle in Springfield", + "Bart the Fink", + "Lisa the Iconoclast", + "Homer the Smithers", + "The Day the Violence Died", + "A Fish Called Selma", + "Bart on the road", + "22 Short Films about Springfield", + "The Curse of the Flying Hellfish", + "Much Apu about Nothing", + "Homerpalooza", + "The Summer of 4 Ft 2", + "Treehouse of Horror VII", + "You Only Move Twice", + "The Homer They Fall", + "Burns Baby Burns", + "Bart After Dark", + "A Millhouse Divided", + "Lisas Date With Destiny", + "Hurricane Neddy", + "The Mysterious Voyage of Our Homer", + "The Springfield Files", + "The Twisted World of Marge Simpson", + "Mountain of Madness", + NULL + }; + + HashTable h( new HashFunctionCaseString(), 5, false ); + + int j; + printf("Inserting...\n"); + for( j = 0; j < 10; j++ ) + { + h.insert( names[j], (void *)(j+1) ); + h.insert( names[j], (void *)(j+1) ); + printf("Capacity: %d, Size: %d, Load: %f\n", + h.getCapacity(), + h.getSize(), + h.getLoad() + ); + } + + for( j = 0; j < 10; j++ ) + { + printf("\"%s\" = %d\n", names[j], (int)h[names[j]] ); + } + + printf("\nDeleting some...\n"); + + for( int k = 0; k < 7; k++ ) + { + h.del( names[k] ); + //h.insert( names[j], (void *)(j+1) ); + printf("Capacity: %d, Size: %d, Load: %f\n", + h.getCapacity(), + h.getSize(), + h.getLoad() + ); + } + + printf("\nInserting more...\n"); + + for( ; names[j] != NULL; j++ ) + { + h.insert( names[j], (void *)(j+1) ); + printf("Capacity: %d, Size: %d, Load: %f\n", + h.getCapacity(), + h.getSize(), + h.getLoad() + ); + } +} diff --git a/src/test/httpsrv/httpconnectionmonitor.cpp b/src/test/httpsrv/httpconnectionmonitor.cpp new file mode 100644 index 0000000..4eb6817 --- /dev/null +++ b/src/test/httpsrv/httpconnectionmonitor.cpp @@ -0,0 +1,72 @@ +#include "httpconnectionmonitor.h" +#include "http.h" +#include + +HttpConnectionMonitor::HttpConnectionMonitor() +{ +} + +HttpConnectionMonitor::~HttpConnectionMonitor() +{ +} + +bool HttpConnectionMonitor::onNewConnection( Connection *pCon ) +{ + Http hp( pCon ); + + pCon->readInput( 60, 0 ); + printf("#######################\n%s\n#######################\n", pCon->getInput() ); + + while( hp.parseRequest() == false ); + printf("Done parsing.\n\n"); + + if( hp.getRequestType() == Http::reqGet ) + { + printf("\"\"\"%s\"\"\"\n", hp.getRequestURI() ); + if( !strcmp( hp.getRequestURI(), "/" ) ) + { + std::string content("Server Test</test></head><body>This is a test of a new system where all the pages will be more or less dynamic...<br>If you want to try to login, you can do that here:<br><form method=\"post\" action=\"showvars\" enctype=\"multipart/form-data\">Name: <input type=\"text\" name=\"name\"><br>Password: <input type=\"password\" name=\"pass\"><br><input type=\"submit\" name=\"action\" value=\"login\"></form></body></html>"); + hp.buildResponse(); + hp.setResponseContent( + "text/html", + content.c_str(), + content.size() + ); + hp.sendResponse(); + } + else + { + std::string content("<html><head><title>URL Not Found</test></head><body>There is no content mapped to the URL you requested. Please try another one.</body></html>"); + hp.buildResponse( 404, "File not found."); + hp.setResponseContent( + "text/html", + content.c_str(), + content.size() + ); + hp.sendResponse(); + } + } + else + { + printf("Non get: %s\n", hp.getRequestTypeStr() ); + } + pCon->writeOutput(); + + if( pCon->hasInput() ) + { + std::string s( pCon->getInput(), pCon->getInputAmnt() ); + + printf("Reamining data\n==============\n%s\n==============\n", + s.c_str() ); + } + + pCon->disconnect(); + + return true; +} + +bool HttpConnectionMonitor::onClosedConnection( Connection *pCon ) +{ + return true; +} + diff --git a/src/test/httpsrv/httpconnectionmonitor.h b/src/test/httpsrv/httpconnectionmonitor.h new file mode 100644 index 0000000..63f29e4 --- /dev/null +++ b/src/test/httpsrv/httpconnectionmonitor.h @@ -0,0 +1,16 @@ +#ifndef HTTPCONNECTIONMONITOR_H +#define HTTPCONNECTIONMONITOR_H + +#include "connectionmonitor.h" + +class HttpConnectionMonitor : public ConnectionMonitor +{ +public: + HttpConnectionMonitor(); + ~HttpConnectionMonitor(); + + bool onNewConnection( Connection *pCon ); + bool onClosedConnection( Connection *pCon ); +}; + +#endif diff --git a/src/test/httpsrv/main.cpp b/src/test/httpsrv/main.cpp new file mode 100644 index 0000000..4ee1ad3 --- /dev/null +++ b/src/test/httpsrv/main.cpp @@ -0,0 +1,21 @@ +#include "connectionmanager.h" +#include "httpconnectionmonitor.h" + +int main() +{ + printf("Starting server...\n"); + + ConnectionManager srv; + HttpConnectionMonitor http; + + srv.setConnectionMonitor( &http ); + + srv.startServer( 7331, 40 ); + + for(;;) + { + srv.scanConnections( 5000, false ); + } + + return 0; +} diff --git a/src/test/md5test.cpp b/src/test/md5test.cpp new file mode 100644 index 0000000..6f832df --- /dev/null +++ b/src/test/md5test.cpp @@ -0,0 +1,19 @@ +#include <stdio.h> +#include <string.h> +#include "md5.h" + +int main() +{ + md5 mproc; + md5sum sum; + char hexstr[33]; + + memset( hexstr, 0, 33 ); + + mproc.sumString( &sum, "qwertyuiopasdfgh" ); + mproc.sumToHex( &sum, hexstr ); + printf("sum: %s\n", hexstr ); + printf("chk: 1ebfc043d8880b758b13ddc8aa1638ef\n"); + + return 0; +} diff --git a/src/test/teltest/main.cpp b/src/test/teltest/main.cpp new file mode 100644 index 0000000..ce968c4 --- /dev/null +++ b/src/test/teltest/main.cpp @@ -0,0 +1,21 @@ +#include "connectionmanager.h" +#include "telnetmonitor.h" + +int main() +{ + printf("Starting server...\n"); + + ConnectionManager srv; + TelnetMonitor telnet; + + srv.setConnectionMonitor( &telnet ); + + srv.startServer( 4001, 40 ); + + for(;;) + { + srv.scanConnections( 5000, false ); + } + + return 0; +} diff --git a/src/test/teltest/telnetmonitor.cpp b/src/test/teltest/telnetmonitor.cpp new file mode 100644 index 0000000..001932f --- /dev/null +++ b/src/test/teltest/telnetmonitor.cpp @@ -0,0 +1,53 @@ +#include "telnetmonitor.h" +#include "protocoltelnet.h" +#include <sys/stat.h> + +TelnetMonitor::TelnetMonitor() +{ +} + +TelnetMonitor::~TelnetMonitor() +{ +} + +bool TelnetMonitor::init() +{ + return true; +} + +bool TelnetMonitor::deInit() +{ + return true; +} + +bool TelnetMonitor::timeSlice() +{ + for( int j = 0; j < lCon.getSize(); j++ ) + { + if( ((Connection *)lCon[j])->hasInput() ) + { + printf("%s\n", ((Connection *)lCon[j])->getInput() ); + } + } + return true; +} + +LinkMessage* TelnetMonitor::processIRM( LinkMessage *pMsg ) +{ +} + +bool TelnetMonitor::onNewConnection( Connection *pCon ) +{ + ProtocolTelnet *pt = new ProtocolTelnet(); + pCon->setProtocol( pt ); + + lCon.append( pt ); + + return true; +} + +bool TelnetMonitor::onClosedConnection( Connection *pCon ) +{ + return true; +} + diff --git a/src/test/teltest/telnetmonitor.h b/src/test/teltest/telnetmonitor.h new file mode 100644 index 0000000..95c8493 --- /dev/null +++ b/src/test/teltest/telnetmonitor.h @@ -0,0 +1,26 @@ +#ifndef HTTPCONNECTIONMONITOR_H +#define HTTPCONNECTIONMONITOR_H + +#include "connectionmonitor.h" +#include "programlink.h" +#include "linkedlist.h" + +class TelnetMonitor : public ConnectionMonitor, public ProgramLink +{ +public: + TelnetMonitor(); + ~TelnetMonitor(); + + bool init(); + bool deInit(); + bool timeSlice(); + LinkMessage* processIRM( LinkMessage *pMsgIn ); + + bool onNewConnection( Connection *pCon ); + bool onClosedConnection( Connection *pCon ); + +private: + LinkedList lCon; +}; + +#endif diff --git a/src/test/xmlreadtest.cpp b/src/test/xmlreadtest.cpp new file mode 100644 index 0000000..5fbd021 --- /dev/null +++ b/src/test/xmlreadtest.cpp @@ -0,0 +1,29 @@ +#include "../xmlfilereader.h" +#include "../xmlstringreader.h" +#include "../xmlfilewriter.h" + +int main( int argc, char *argv[] ) +{ + if( argc < 4 ) + { + printf("Usage: %s f <file in> <file out>\n", argv[0] ); + printf(" %s s <xml string> <file out>\n\n", argv[0] ); + return 0; + } + + if( argv[1][0] == 'f' ) + { + XmlFileReader r( argv[2], true ); + XmlFileWriter w( argv[3], "\t", r.getRoot() ); + w.write(); + //XmlWriter::write( argv[3], r.getRoot(), "\t" ); + } + else if( argv[1][0] == 's' ) + { + XmlStringReader r( argv[2], true ); + //XmlWriter::write( argv[3], r.getRoot(), "\t" ); + } + + return 0; +} + diff --git a/src/test/xmlrepltest.cpp b/src/test/xmlrepltest.cpp new file mode 100644 index 0000000..1fe9ec2 --- /dev/null +++ b/src/test/xmlrepltest.cpp @@ -0,0 +1,31 @@ +#include "xmlwriter.h" + +int main() +{ + printf("Testing Xml Replacement...\n"); + XmlDocument w; + + w.addNode("text"); + w.setContent("this text is before the node. "); + w.addNode("keepme", "This one we keep...", true ); + w.setContent("this text is after."); + w.addNode("deleteme", "This one we don't...", true ); + w.setContent("this is last..." ); + w.closeNode(); + + //XmlWriter::writeNode( stdout, w.getRoot(), 0, NULL ); + + printf("\n\n"); + + XmlNode *xNode = w.getRoot()->detatchNode( 1 ); + + //XmlWriter::writeNode( stdout, w.getRoot(), 0, NULL ); + + printf("\n\n"); + + //XmlWriter::writeNode( stdout, xNode, 0, NULL ); + + printf("\n\n"); + + return 0; +} diff --git a/src/test/xmlwritetest.cpp b/src/test/xmlwritetest.cpp new file mode 100644 index 0000000..340c9a3 --- /dev/null +++ b/src/test/xmlwritetest.cpp @@ -0,0 +1,41 @@ +#include "xmlfilewriter.h" +#include "xmlstringwriter.h" + +void fillItIn( XmlWriter &w ) +{ + w.addNode("thinglist"); + + w.addNode("thing"); + w.addProperty("type", "Weapon"); + + w.addNode("id", "Klophin Staff", true ); + w.addNode("name", "Klophin Staff", true ); + w.addNode("durability", "0.01", true ); + w.addNode("size", "0.1", true ); + + w.addNode("config"); + w.addNode("damage", "3d6+4", true ); + w.addNode("class", "melee", true ); + w.addNode("type", "bludgeon", true ); + w.addNode("damagedesc", "club/clubs", true ); + w.closeNode(); + + w.closeNode(); + + w.closeNode(); +} + +int main() +{ + printf("Testing XmlWriter...\n"); + XmlFileWriter wf("test.xml", "\t"); + + fillItIn( wf ); + + XmlStringWriter ws("\t"); + fillItIn( ws ); + + printf("Now the string version:\n\n%s\n", ws.getString().c_str() ); + + return 0; +} diff --git a/src/tokenstring.cpp b/src/tokenstring.cpp new file mode 100644 index 0000000..0c861ac --- /dev/null +++ b/src/tokenstring.cpp @@ -0,0 +1,172 @@ +/*************************************************************************** + * Copyright (C) 2003 by Mike Buland * + * eichlan@Xagafinelle * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#include "tokenstring.h" +#include <string.h> + +TokenString::TokenString( const char *lpNewTokenString ) +{ + lpTokenString = NULL; + if( lpNewTokenString ) + { + parseLine( lpNewTokenString ); + } +} + +TokenString::~TokenString() +{ + delete[] lpTokenString; + for( int j = 0; j < lToken.getSize(); j++ ) + { + delete[] (((Token *)lToken[j])->lpToken); + delete ((Token *)lToken[j]); + } +} + +void TokenString::parseLine( const char *lpNewTokenString ) +{ + if( lpTokenString != NULL ) + { + delete[] lpTokenString; + lpTokenString = NULL; + for( int j = 0; j < lToken.getSize(); j++ ) + { + delete[] (((Token *)lToken[j])->lpToken); + delete ((Token *)lToken[j]); + } + lToken.empty(); + } + if( lpNewTokenString == NULL ) + { + lpTokenString = new char[1]; + lpTokenString[0] = '\0'; + lToken.setSize(0); + return; + } + // First order of business, make an internal copy so someone can get it + // if they want to. + int nLen = strlen(lpNewTokenString); + lpTokenString = new char[nLen+1]; + strcpy( lpTokenString, lpNewTokenString ); + + // Now we do a preliminary parse. This could be effected by later + // editing and aliasing, but we'll see... + int nTkStart, nTkEnd; + int mode=0; // 0 = startSearch, 1=endSearch + for( int j = 0; j <= nLen; j++ ) + { + if( mode == 0 ) + { + if( lpTokenString[j] != ' ' && + lpTokenString[j] != '\t' ) + { + nTkStart = j; + mode = 1; + } + } + else + { + if( lpTokenString[j] == ' ' || + lpTokenString[j] == '\t' || + lpTokenString[j] == '\0' ) + { + nTkEnd = j-1; + mode = 0; + + appendToken( nTkStart, nTkEnd ); + } + } + } +} + +void TokenString::appendToken( int nStart, int nEnd ) +{ + Token *pToken = new Token; + pToken->lpOrig = &lpTokenString[nStart]; + + // nStart and nEnd are inclusive, we must add two for the end, and the null + pToken->lpToken = new char[nEnd-nStart+2]; + memcpy( pToken->lpToken, &lpTokenString[nStart], nEnd-nStart+1 ); + pToken->lpToken[nEnd-nStart+1] = '\0'; + +// printf("%s\n", pToken->lpToken ); + lToken.append( pToken ); +} + +void TokenString::insertToken( int nStart, int nEnd, char *lpOldOrig, const char *lpNewToken, int nIndex ) +{ + Token *pToken = new Token; + pToken->lpOrig = lpOldOrig; + + // nStart and nEnd are inclusive, we must add two for the end, and the null + pToken->lpToken = new char[nEnd-nStart+2]; + memcpy( pToken->lpToken, &lpNewToken[nStart], nEnd-nStart+1 ); + pToken->lpToken[nEnd-nStart+1] = '\0'; + + lToken.insertBefore( pToken, nIndex ); +} + +int TokenString::getNumTokens() +{ + return lToken.getSize(); +} + +char *TokenString::getToken( int nIndex ) +{ + if( nIndex >= lToken.getSize() ) return NULL; + return (char *)(((Token *)lToken[nIndex])->lpToken); +} + +char *TokenString::getTokenString( int nIndex ) +{ + if( nIndex >= lToken.getSize() ) return NULL; + return (char *)(((Token *)lToken[nIndex])->lpOrig); +} + +void TokenString::expandTokenTo( int nIndex, char *lpNewToken ) +{ + // First, we delete the token at nIndex, then we keep inserting + // at that position... + // We also have to remember the index to the original string, + // since most of what we're expanding to won't be in the origingal + // we need to keep these indexes updated in order to make other parts + // of the system happy. + char *lpOldOrig = ((Token *)lToken[nIndex])->lpOrig; + delete[] ((Token *)lToken[nIndex])->lpToken; + delete ((Token *)lToken[nIndex]); + lToken.deleteAt( nIndex ); + + // We'll do this just like we did above, but instead we'll + // do tricky things when we find tokens... + int nLen = strlen(lpNewToken); + int nTkStart, nTkEnd, nNewIndex=nIndex; + int mode=0; // 0 = startSearch, 1=endSearch + for( int j = 0; j <= nLen; j++ ) + { + if( mode == 0 ) + { + if( lpNewToken[j] != ' ' && lpNewToken[j] != '\t' ) + { + nTkStart = j; + mode = 1; + } + } + else + { + if( lpNewToken[j] == ' ' || lpNewToken[j] == '\t' || lpNewToken[j] == '\0' ) + { + nTkEnd = j-1; + mode = 0; + + insertToken( nTkStart, nTkEnd, lpOldOrig, lpNewToken, nNewIndex ); + nNewIndex++; + } + } + } +} diff --git a/src/tokenstring.h b/src/tokenstring.h new file mode 100644 index 0000000..25f710b --- /dev/null +++ b/src/tokenstring.h @@ -0,0 +1,120 @@ +/*************************************************************************** + * Copyright (C) 2003 by Mike Buland * + * eichlan@Xagafinelle * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + ***************************************************************************/ +#ifndef TOKENSTRING_H +#define TOKENSTRING_H + +#include "linkedlist.h" + +/** A single tokenized command line. Contains all information necesarry to + * nicely access a stand-alone command line and to perform alias expansion + * inside of that command line. + * When expanding a token, the original command line is left intact, so any + * command usng a command line verbatum (getTokenString not getToken) will get + * the original, and not the expanded version. + * Since indexing into the original command line is also done by token, it + * means that using getTokenString( 0 ) will not always get you the first + * character of the command line, it will get you the first non-whitespace + * character. + * Furthermore, when expanding the expantion string is tokenized as well, + * but since the original string is unchanged, all tokens that expand any + * given index will all retain the same index into the original command line. + *@todo Update this to allow it to break on different types of token + * delimiters. + *@author Mike Buland + */ +class TokenString{ +public: + /** Automatically call parseLine when created. + *@param lpNewTokenString The command line to tokenize + *@author Mike Buland + */ + TokenString( const char *lpNewTokenString=NULL ); + ~TokenString(); + + /** Performs a tokenizing parse on the given command line, setting it as + * the internal command line for all future tokenizing (excluding + * expansion) + *@param lpNewTokenString The new command line to set to this object. + *@author Mike Buland + */ + void parseLine( const char *lpNewTokenString ); + + /** Appends a token to the list of available tokens. This references the + * internal pointer to the command line, so no token string must be + * specified. + *@param nStart The first character of the token to insert. + *@param nEnd The last character of the token to insert. + *@author Mike Buland + */ + void appendToken( int nStart, int nEnd ); + + /** Gets the number of tokens. This is particularly useful post-aliasing + * since the number of tokens may not match what is percieved from the + * original command line. + *@returns The number of available tokens. + *@author Mike Buland + */ + int getNumTokens(); + + /** Gets a processed token specified by index. + *@param nIndex The index of the token to retrieve. + *@returns A pointer to the requested token. Please note that these tokens + * may not match the original command line. + *@author Mike Buland + */ + char *getToken( int nIndex ); + + /** Gets the original command line based on tokens. Use this if you want + * to perform your own processing on parts of the command line, without + * resorting to tokens. + * The first character in the returned string will always be + * non-whitespace. + *@param nIndex The index of the token to start at, zero gets you the whole + * command line. + *@returns A pointer to the internal original command line string, starting + * at the position of the first non-whitespace character of the token + * specified. + *@author Mike Buland + */ + char *getTokenString( int nIndex=0 ); + + /** Expands a token, replacing it with the string lpNewToken, but + * processing the new string for tokens before performing the replacement + *@param nIndex Which token should be replaced. + *@param lpNewToken The string to replace the token with. + *@author Mike Buland + */ + void expandTokenTo( int nIndex, char *lpNewToken ); + + /** Inserts a token at any position in the command line. This does not + * effect the original command line. + *@param nStart The start of the token in the string lpNewToken. (inclusive) + *@param nEnd The end of the token in the string lpToken. (inclusive) + *@param lpOldOrig The pointer to the position in the orginal command + * line where this new token should point. + *@param lpNewToken The string containing the new token. May contain more + * than just one token. + *@param nIndex The position to insert the token to. + *@author Mike Buland + */ + void insertToken( int nStart, int nEnd, char *lpOldOrig, const char *lpNewToken, int nIndex ); + +private: + char *lpTokenString; + LinkedList lToken; + + typedef struct Token + { + char *lpOrig; // This is just a pointer back to lpTokenString + char *lpToken; // This is really a whole token + } Token; +}; + +#endif diff --git a/src/xmldocument.cpp b/src/xmldocument.cpp new file mode 100644 index 0000000..234ff81 --- /dev/null +++ b/src/xmldocument.cpp @@ -0,0 +1,142 @@ +#include <stdio.h> +#include <stdlib.h> +#include "xmlwriter.h" + +XmlDocument::XmlDocument( XmlNode *pRoot ) +{ + this->pRoot = pRoot; + pCurrent = NULL; + bCompleted = (pRoot!=NULL); +} + +XmlDocument::~XmlDocument() +{ + if( pRoot ) + { + delete pRoot; + } +} + +void XmlDocument::addNode( const char *sName, const char *sContent, bool bClose ) +{ + if( pRoot == NULL ) + { + // This is the first node, so ignore position and just insert it. + pCurrent = pRoot = new XmlNode( sName, NULL, sContent ); + } + else + { + pCurrent = pCurrent->addChild( sName, sContent ); + } + + if( bClose ) + { + closeNode(); + } +} + +void XmlDocument::setName( const char *sName ) +{ + pCurrent->setName( sName ); +} + +bool XmlDocument::isCompleted() +{ + return bCompleted; +} + +XmlNode *XmlDocument::getRoot() +{ + return pRoot; +} + +XmlNode *XmlDocument::getCurrent() +{ + return pCurrent; +} + +void XmlDocument::closeNode() +{ + if( pCurrent != NULL ) + { + pCurrent = pCurrent->getParent(); + + if( pCurrent == NULL ) + { + bCompleted = true; + } + } +} + +void XmlDocument::addProperty( const char *sName, const char *sValue ) +{ + if( pCurrent ) + { + pCurrent->addProperty( sName, sValue ); + } +} + +void XmlDocument::addProperty( const char *sName, const unsigned char nValue ) +{ + char buf[12]; + sprintf( buf, "%hhi", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const char nValue ) +{ + char buf[12]; + sprintf( buf, "%hhi", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const unsigned short nValue ) +{ + char buf[12]; + sprintf( buf, "%hi", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const short nValue ) +{ + char buf[12]; + sprintf( buf, "%hi", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const int nValue ) +{ + char buf[12]; + sprintf( buf, "%li", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const unsigned long nValue ) +{ + char buf[12]; + sprintf( buf, "%li", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const long nValue ) +{ + char buf[12]; + sprintf( buf, "%li", nValue ); + addProperty( sName, buf ); +} + +void XmlDocument::addProperty( const char *sName, const double dValue ) +{ + char buf[40]; + sprintf( buf, "%f", dValue ); + addProperty( sName, buf ); +} + +void XmlDocument::setContent( const char *sContent ) +{ + if( pCurrent ) + { + pCurrent->setContent( sContent ); + } +} + diff --git a/src/xmldocument.h b/src/xmldocument.h new file mode 100644 index 0000000..f9a8606 --- /dev/null +++ b/src/xmldocument.h @@ -0,0 +1,163 @@ +#ifndef XMLDOCUMENT +#define XMLDOCUMENT + +#include "xmlnode.h" + +/** + * Keeps track of an easily managed set of XmlNode information. Allows simple + * operations for logical writing to and reading from XML structures. Using + * already formed structures is simply done through the XmlNode structures, + * and the getRoot function here. Creation is performed through a simple set + * of operations that creates the data in a stream type format. + *@author Mike Buland + */ +class XmlDocument +{ +public: + /** + * Construct either a blank XmlDocuemnt or construct a document around an + * existing XmlNode. Be careful, once an XmlNode is passed into a document + * the document takes over ownership and will delete it when the XmlDocument + * is deleted. + *@param pRoot The XmlNode to use as the root of this document, or NULL if + * you want to start a new document. + */ + XmlDocument( XmlNode *pRoot=NULL ); + + /** + * Destroy all contained nodes. + */ + ~XmlDocument(); + + /** + * Add a new node to the document. The new node is appended to the end of + * the current context, i.e. XmlNode, and the new node, provided it isn't + * close as part of this operation, will become the current context. + *@param sName The name of the new node to add. + *@param sContent A content string to be placed inside of the new node. + *@param bClose Set this to true to close the node immediately after adding + * the node and setting the content and name. If this is set to true the + * node is appended, but the context node doesn't change. + */ + void addNode( const char *sName=NULL, const char *sContent=NULL, bool bClose=false ); + + /** + * Set the name of the current node context. + *@param sName The new name of the node. + */ + void setName( const char *sName ); + + /** + * Close the current node context. This will move the current context to + * the parent node of the former current node. If the current node was the + * root then the "completed" flag is set and no more operations are allowed. + */ + void closeNode(); + + /** + * Change the content of the current node at the current position between + * nodes. + *@param sContent The new content of the current node. + */ + void setContent( const char *sContent ); + + /** + * Add a named property to the current context node. + *@param sName The name of the property to add. + *@param sValue The string value of the property. + */ + void addProperty( const char *sName, const char *sValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const unsigned char nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const char nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const unsigned short nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const short nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const unsigned long nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const long nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param nValue The numerical value to add. + */ + void addProperty( const char *sName, const int nValue ); + + /** + * Add a named property to the current context node, converting the + * numerical parameter to text using standrd printf style conversion. + *@param sName The name of the property to add. + *@param dValue The numerical value to add. + */ + void addProperty( const char *sName, const double dValue ); + + /** + * The XmlDocuemnt is considered completed if the root node has been closed. + * Once an XmlDocument has been completed, you can no longer perform + * operations on it. + *@return True if completed, false if still in progress. + */ + bool isCompleted(); + + /** + * Get a pointer to the root object of this XmlDocument. + *@returns A pointer to an internally owned XmlNode. Do not delete this + * XmlNode. + */ + XmlNode *getRoot(); + + /** + * Get the current context node, which could be the same as the root node. + *@returns A pointer to an internally owned XmlNode. Do not delete this + * XmlNode. + */ + XmlNode *getCurrent(); + +private: + XmlNode *pRoot; /**< The root node. */ + XmlNode *pCurrent; /**< The current node. */ + bool bCompleted; /**< Is it completed? */ +}; + +#endif diff --git a/src/xmlfilereader.cpp b/src/xmlfilereader.cpp new file mode 100644 index 0000000..216c08a --- /dev/null +++ b/src/xmlfilereader.cpp @@ -0,0 +1,63 @@ +#include "xmlfilereader.h" +#include <string.h> + +XmlFileReader::XmlFileReader( const char *sFile, bool bStrip ) + : XmlReader( bStrip ) +{ + fh = fopen( sFile, "rt" ); + + if( fh == NULL ) + { + reportError("Couldn't open file."); + //nError = 1; + } + else + { + char buf[50]; + fgets( buf, 50, fh ); + + if( !strcmp( buf, "<?xml version=\"1.0\"?>\n" ) ) + { + buildDoc(); + } + } +} + +XmlFileReader::~XmlFileReader() +{ +} + +char XmlFileReader::getChar( int nIndex ) +{ + // Make sure we always have a little data left in the buffer + if( fbDataIn.getLength() <= nIndex+1 && fh ) + { + int nBytes = fbDataIn.getCapacity()-1; + char *buf = new char[nBytes]; + int nRead = fread( buf, 1, nBytes, fh ); + fbDataIn.appendData( buf, nRead ); + delete[] buf; + + if( nRead < nBytes ) + { + fclose( fh ); + fh = NULL; + } + } + if( fbDataIn.getLength() >= nIndex+1 ) + { + return fbDataIn.getData()[nIndex]; + } + else + { + return '\0'; + } +} + +void XmlFileReader::usedChar() +{ + if( fbDataIn.getLength() > 0 ) + { + fbDataIn.usedData( 1 ); + } +} diff --git a/src/xmlfilereader.h b/src/xmlfilereader.h new file mode 100644 index 0000000..3e996e6 --- /dev/null +++ b/src/xmlfilereader.h @@ -0,0 +1,47 @@ +#ifndef XMLFILEREADER +#define XMLFILEREADER + +#include <stdio.h> +#include "xmlreader.h" +#include "flexbuf.h" + +/** + * Takes care of reading in xml formatted data from a file. This could/should + * be made more arbitrary in the future so that we can read the data from any + * source. This is actually made quite simple already since all data read in + * is handled by one single helper function and then palced into a FlexBuf for + * easy access by the other functions. The FlexBuf also allows for block + * reading from disk, which improves speed by a noticable amount. + * <br> + * There are also some extra features implemented that allow you to break the + * standard XML reader specs and eliminate leading and trailing whitespace in + * all read content. This is useful in situations where you allow additional + * whitespace in the files to make them easily human readable. The resturned + * content will be NULL in sitautions where all content between nodes was + * stripped. + *@author Mike Buland + */ +class XmlFileReader : public XmlReader +{ +public: + /** + * Construct an XmlReader around an xml file on your file system. + *@param sFile The file to read. + *@param bStrip Set to true to strip out leading and trailing whitespace in + * node contents. + */ + XmlFileReader( const char *sFile, bool bStrip=false ); + + /** + * Destroy the reader and cleanup. + */ + ~XmlFileReader(); + +private: + char getChar( int nIndex = 0 ); + void usedChar(); + FILE *fh; /**< The file handle. */ + FlexBuf fbDataIn; /**< The input buffer. */ +}; + +#endif diff --git a/src/xmlfilewriter.cpp b/src/xmlfilewriter.cpp new file mode 100644 index 0000000..b62fb11 --- /dev/null +++ b/src/xmlfilewriter.cpp @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <stdlib.h> +#include "xmlfilewriter.h" + +XmlFileWriter::XmlFileWriter( const char *sFileName, const char *sIndent, XmlNode *pRoot ) : + XmlWriter( sIndent, pRoot ) +{ + this->sFileName = sFileName; + fh = fopen( sFileName, "wt"); + fprintf( fh, "<?xml version=\"1.0\"?>\n"); +} + +XmlFileWriter::~XmlFileWriter() +{ + fclose( fh ); +} + +void XmlFileWriter::writeString( const char *sString ) +{ + fprintf( fh, sString ); +} + diff --git a/src/xmlfilewriter.h b/src/xmlfilewriter.h new file mode 100644 index 0000000..97b3e00 --- /dev/null +++ b/src/xmlfilewriter.h @@ -0,0 +1,44 @@ +#ifndef XML_FILE_WRITER +#define XML_FILE_WRITER + +#include "xmlnode.h" +#include "xmlwriter.h" + +/** + * Implements xml writing in the XML standard format. Also allows you to + * break that format and auto-indent your exported xml data for ease of + * reading. The auto-indenting will only be applied to sections that + * have no content of their own already. This means that except for + * whitespace all of your data will be preserved perfectly. + * You can create an XmlWriter object around a file, or access the static + * write function directly and just hand it a filename and a root XmlNode. + * When using an XmlWriter object the interface is identicle to that of + * the XmlDocument class, so reference that class for API info. However + * when the initial (or root) node is closed, and the document is finished + * the file will be created and written to automatically. The user can + * check to see if this is actually true by calling the isFinished + * function in the XmlDocument class. + *@author Mike Buland + */ +class XmlFileWriter : public XmlWriter +{ +public: + /** + * Construct a file writer around a given file. + *@param sFileName The file to create or overwrite and write XML into. + *@param sIndent The indent text to use, if any. + */ + XmlFileWriter( const char *sFileName, const char *sIndent=NULL, XmlNode *pRoot=NULL ); + + /** + * Destroy the writer. + */ + ~XmlFileWriter(); + +private: + void writeString( const char *sString ); + std::string sFileName; /**< The filename to write to. */ + FILE *fh; /**< The file handle to the open file. */ +}; + +#endif diff --git a/src/xmlnode.cpp b/src/xmlnode.cpp new file mode 100644 index 0000000..e5c77e2 --- /dev/null +++ b/src/xmlnode.cpp @@ -0,0 +1,454 @@ +#include "xmlnode.h" +#include "hashfunctionstring.h" + +XmlNode::XmlNode( const char *sName, XmlNode *pParent, const char *sContent ) : + hProperties( new HashFunctionString(), 53, false ), + hChildren( new HashFunctionString(), 53, true ) +{ + this->pParent = pParent; + if( sName != NULL ) + { + setName( sName ); + } + if( sContent != NULL ) + { + this->sPreContent = new std::string( sContent ); + } + else + { + this->sPreContent = NULL; + } + nCurContent = 0; +} + +XmlNode::~XmlNode() +{ + for( int j = 0; j < lChildren.getSize(); j++ ) + { + delete (XmlNode *)lChildren[j]; + } + for( int j = 0; j < lPropNames.getSize(); j++ ) + { + delete (std::string *)lPropNames[j]; + } + for( int j = 0; j < lPropValues.getSize(); j++ ) + { + delete (std::string *)lPropValues[j]; + } + for( int j = 0; j < lPostContent.getSize(); j++ ) + { + if( lPostContent[j] != NULL ) + { + delete (std::string *)lPostContent[j]; + } + } + if( sPreContent ) + { + delete sPreContent; + } +} + +void XmlNode::setName( const char *sName ) +{ + if( pParent ) + { + if( this->sName.size() == 0 ) + { + // We're not in the hash yet, so add us + this->sName = sName; + pParent->hChildren.insert( this->sName.c_str(), this ); + } + else + { + // Slightly more tricky, delete us, then add us... + pParent->hChildren.del( this->sName.c_str() ); + this->sName = sName; + pParent->hChildren.insert( this->sName.c_str(), this ); + } + } + else + { + // If we have no parent, then just set the name string, we don't need + // to worry about hashing. + this->sName = sName; + } +} + +void XmlNode::setContent( const char *sContent, int nIndex ) +{ + if( nIndex == -1 ) + { + nIndex = nCurContent; + } + if( nIndex == 0 ) + { + if( this->sPreContent ) + { + delete this->sPreContent; + } + + this->sPreContent = new std::string( sContent ); + } + else + { + nIndex--; + if( lPostContent[nIndex] ) + { + delete (std::string *)lPostContent[nIndex]; + } + + lPostContent.setAt( nIndex, new std::string( sContent ) ); + } +} + +const char *XmlNode::getContent( int nIndex ) +{ + if( nIndex == 0 ) + { + if( sPreContent ) + { + return sPreContent->c_str(); + } + } + else + { + nIndex--; + if( lPostContent[nIndex] ) + { + return ((std::string *)lPostContent[nIndex])->c_str(); + } + } + + return NULL; +} + +XmlNode *XmlNode::addChild( const char *sName, const char *sContent ) +{ + return addChild( new XmlNode( sName, this, sContent ) ); +} + +XmlNode *XmlNode::addChild( XmlNode *pNode ) +{ + lChildren.append( pNode ); + lPostContent.append( NULL ); + nCurContent++; + pNode->pParent = this; + + return pNode; +} + +XmlNode *XmlNode::getParent() +{ + return pParent; +} + +void XmlNode::addProperty( const char *sName, const char *sValue ) +{ + std::string *pName = new std::string( sName ); + std::string *pValue = new std::string( sValue ); + + hProperties.insert( pName->c_str(), pValue->c_str() ); + lPropNames.append( pName ); + lPropValues.append( pValue ); +} + +int XmlNode::getNumProperties() +{ + return lPropNames.getSize(); +} + +const char *XmlNode::getPropertyName( int nIndex ) +{ + std::string *tmp = ((std::string *)lPropNames[nIndex]); + if( tmp == NULL ) + return NULL; + return tmp->c_str(); +} + +const char *XmlNode::getProperty( int nIndex ) +{ + std::string *tmp = ((std::string *)lPropValues[nIndex]); + if( tmp == NULL ) + return NULL; + return tmp->c_str(); +} + +const char *XmlNode::getProperty( const char *sName ) +{ + const char *tmp = (const char *)hProperties[sName]; + if( tmp == NULL ) + return NULL; + return tmp; +} + +bool XmlNode::deleteProperty( int nIndex ) +{ + hProperties.del( ((std::string *)lPropNames[nIndex])->c_str() ); + + delete (std::string *)lPropNames[nIndex]; + delete (std::string *)lPropValues[nIndex]; + + lPropNames.deleteAt( nIndex ); + lPropValues.deleteAt( nIndex ); +} + +bool XmlNode::hasChildren() +{ + return lChildren.getSize()>0; +} + +int XmlNode::getNumChildren() +{ + return lChildren.getSize(); +} + +XmlNode *XmlNode::getChild( int nIndex ) +{ + return (XmlNode *)lChildren[nIndex]; +} + +XmlNode *XmlNode::getChild( const char *sName, int nSkip ) +{ + return (XmlNode *)hChildren.get( sName, nSkip ); +} + +const char *XmlNode::getName() +{ + return sName.c_str(); +} + +bool XmlNode::deleteNode( int nIndex, const char *sReplacementText ) +{ + XmlNode *xRet = detatchNode( nIndex, sReplacementText ); + + if( xRet == NULL ) + { + return false; + } + else + { + delete xRet; + return true; + } +} + +XmlNode *XmlNode::detatchNode( int nIndex, const char *sReplacementText ) +{ + if( nIndex < 0 || nIndex >= lChildren.getSize() ) + return NULL; + + // The real trick when deleteing a node isn't actually deleting it, it's + // reforming the content around the node that's now missing...hmmm... + + if( nIndex == 0 ) + { + // If the index is zero we have to deal with the pre-content + if( sReplacementText ) + { + if( sPreContent == NULL ) + { + sPreContent = new std::string( sReplacementText ); + } + else + { + *sPreContent += sReplacementText; + } + } + if( lPostContent.getSize() > 0 ) + { + if( lPostContent[0] != NULL ) + { + if( sPreContent == NULL ) + { + sPreContent = new std::string( + ((std::string *)lPostContent[0])->c_str() + ); + } + else + { + *sPreContent += + ((std::string *)lPostContent[0])->c_str(); + } + } + delete (std::string *)lPostContent[0]; + lPostContent.deleteAt( 0 ); + } + } + else + { + int nCont = nIndex-1; + // If it's above zero we deal with the post-content only + if( sReplacementText ) + { + if( lPostContent[nCont] == NULL ) + { + lPostContent.setAt( nCont, new std::string( sReplacementText ) ); + } + else + { + *((std::string *)lPostContent[nCont]) += sReplacementText; + } + } + if( lPostContent.getSize() > nIndex ) + { + if( lPostContent[nIndex] != NULL ) + { + if( lPostContent[nCont] == NULL ) + { + lPostContent.setAt( nCont, new std::string( + ((std::string *)lPostContent[nIndex])->c_str() + ) ); + } + else + { + *((std::string *)lPostContent[nCont]) += + ((std::string *)lPostContent[nIndex])->c_str(); + } + } + delete (std::string *)lPostContent[nIndex]; + lPostContent.deleteAt( nIndex ); + } + } + + XmlNode *xRet = (XmlNode *)lChildren[nIndex]; + hChildren.del( ((XmlNode *)lChildren[nIndex])->getName() ); + lChildren.deleteAt( nIndex ); + + return xRet; +} + +bool XmlNode::replaceNode( int nIndex, XmlNode *pNewNode ) +{ + if( nIndex < 0 || nIndex >= lChildren.getSize() ) + return false; + + delete (XmlNode *)lChildren[nIndex]; + lChildren.setAt( nIndex, pNewNode ); + pNewNode->pParent = this; + + return true; +} + +XmlNode *XmlNode::getCopy() +{ + XmlNode *pNew = new XmlNode(); + + pNew->sName = sName; + if( sPreContent ) + { + pNew->sPreContent = new std::string( sPreContent->c_str() ); + } + else + { + pNew->sPreContent = NULL; + } + pNew->nCurContent = 0; + + int nSize = lPostContent.getSize(); + pNew->lPostContent.setSize( nSize ); + for( int j = 0; j < nSize; j++ ) + { + if( lPostContent[j] ) + { + pNew->lPostContent.setAt( + j, new std::string( + ((std::string *)lPostContent[j])->c_str() + ) + ); + } + else + { + pNew->lPostContent.setAt( j, NULL ); + } + } + + nSize = lChildren.getSize(); + pNew->lChildren.setSize( nSize ); + for( int j = 0; j < nSize; j++ ) + { + XmlNode *pChild = ((XmlNode *)lChildren[j])->getCopy(); + pNew->lChildren.setAt( j, pChild ); + pChild->pParent = this; + hChildren.insert( pChild->getName(), pChild ); + } + + nSize = lPropNames.getSize(); + pNew->lPropNames.setSize( nSize ); + pNew->lPropValues.setSize( nSize ); + for( int j = 0; j < nSize; j++ ) + { + std::string *pProp = new std::string( ((std::string *)lPropNames[j])->c_str() ); + std::string *pVal = new std::string( ((std::string *)lPropValues[j])->c_str() ); + pNew->lPropNames.setAt( j, pProp ); + pNew->lPropValues.setAt( j, pVal ); + pNew->hProperties.insert( pProp->c_str(), pVal->c_str() ); + pNew->nCurContent++; + } + + return pNew; +} + +bool XmlNode::deleteNodeKeepChildren( int nIndex ) +{ + // This is a tricky one...we need to do some patching to keep things all + // even... + XmlNode *xRet = (XmlNode *)lChildren[nIndex]; + + if( xRet == NULL ) + { + return false; + } + else + { + if( getContent( nIndex ) ) + { + std::string sBuf( getContent( nIndex ) ); + sBuf += xRet->getContent( 0 ); + setContent( sBuf.c_str(), nIndex ); + } + else + { + setContent( xRet->getContent( 0 ), nIndex ); + } + + int nSize = xRet->lChildren.getSize(); + for( int j = 0; j < nSize; j++ ) + { + XmlNode *pCopy = ((XmlNode *)xRet->lChildren[j])->getCopy(); + pCopy->pParent = this; + lChildren.insertBefore( pCopy, nIndex+j ); + + if( xRet->lPostContent[j] ) + { + lPostContent.insertBefore( + new std::string( ((std::string *)xRet->lPostContent[j])->c_str() ), + nIndex+j + ); + } + else + { + lPostContent.insertBefore( NULL, nIndex+j ); + } + } + + if( getContent( nIndex+nSize ) ) + { + //SString sBuf( getContent( nIndex+nSize ) ); + //sBuf.catfrom( xRet->getContent( nSize ) ); + //setContent( sBuf, nIndex+nSize ); + } + else + { + setContent( xRet->getContent( nSize ), nIndex+nSize ); + } + + deleteNode( nIndex+nSize ); + return true; + } + +} + +bool XmlNode::replaceNodeWithChildren( int nIndex, XmlNode *pNewNode ) +{ +} + diff --git a/src/xmlnode.h b/src/xmlnode.h new file mode 100644 index 0000000..2b01226 --- /dev/null +++ b/src/xmlnode.h @@ -0,0 +1,236 @@ +#ifndef XMLNODE +#define XMLNODE + +#include <iostream> +#include "linkedlist.h" +#include "hashtable.h" + +/** + * Maintains all data pertient to an XML node, including sub-nodes and content. + * All child nodes can be accessed through index and through name via a hash + * table. This makes it very easy to gain simple and fast access to all of + * your data. For most applications, the memory footprint is also rather + * small. While XmlNode objects can be used directly to create XML structures + * it is highly reccomended that all operations be performed through the + * XmlDocument class. + *@author Mike Buland + */ +class XmlNode +{ +public: + /** + * Construct a new XmlNode. + *@param sName The name of the node. + *@param pParent The parent node. + *@param sContent The initial content string. + */ + XmlNode( + const char *sName=NULL, + XmlNode *pParent = NULL, + const char *sContent=NULL + ); + + /** + * Delete the node and cleanup all memory. + */ + ~XmlNode(); + + /** + * Change the name of the node. + *@param sName The new name of the node. + */ + void setName( const char *sName ); + + /** + * Construct a new node and add it as a child to this node, also return a + * pointer to the newly constructed node. + *@param sName The name of the new node. + *@param sContent The initial content of the new node. + *@returns A pointer to the newly created child node. + */ + XmlNode *addChild( const char *sName, const char *sContent=NULL ); + + /** + * Add an already created XmlNode as a child to this node. The new child + * XmlNode's parent will be changed appropriately and the parent XmlNode + * will take ownership of the child. + *@param pChild The child XmlNode to add to this XmlNode. + *@returns A pointer to the child node that was just added. + */ + XmlNode *addChild( XmlNode *pChild ); + + /** + * Add a new property to the XmlNode. Properties are name/value pairs. + *@param sName The name of the property. Specifying a name that's already + * in use will overwrite that property. + *@param sValue The textual value of the property. + */ + void addProperty( const char *sName, const char *sValue ); + + /** + * Get a pointer to the parent node, if any. + *@returns A pointer to the node's parent, or NULL if there isn't one. + */ + XmlNode *getParent(); + + /** + * Tells you if this node has children. + *@returns True if this node has at least one child, false otherwise. + */ + bool hasChildren(); + + /** + * Tells you how many children this node has. + *@returns The number of children this node has. + */ + int getNumChildren(); + + /** + * Get a child node at a specific index. + *@param nIndex The zero-based index of the child to retreive. + *@returns A pointer to the child, or NULL if you requested an invalid + * index. + */ + XmlNode *getChild( int nIndex ); + + /** + * Get a child with the specified name, and possibly skip value. For an + * explination of skip values see the HashTable. + *@param sName The name of the child to find. + *@param nSkip The number of nodes with that name to skip. + *@returns A pointer to the child, or NULL if no child with that name was + * found. + */ + XmlNode *getChild( const char *sName, int nSkip=0 ); + + /** + * Get a pointer to the name of this node. Do not change this, use setName + * instead. + *@returns A pointer to the name of this node. + */ + const char *getName(); + + /** + * Set the content of this node, optionally at a specific index. Using the + * default of -1 will set the content after the last added node. + *@param sContent The content string to use. + *@param nIndex The index of the content. + */ + void setContent( const char *sContent, int nIndex=-1 ); + + /** + * Get the content string at a given index, or zero for initial content. + *@param nIndex The index of the content. + *@returns A pointer to the content at that location. + */ + const char *getContent( int nIndex = 0 ); + + /** + * Get the number of properties in this node. + *@returns The number of properties in this node. + */ + int getNumProperties(); + + /** + * Get a property's name by index. + *@param nIndex The index of the property to examine. + *@returns A pointer to the name of the property specified, or NULL if none + * found. + */ + const char *getPropertyName( int nIndex ); + + /** + * Get a proprty's value by index. + *@param nIndex The index of the property to examine. + *@returns A pointer to the value of the property specified, or NULL if none + * found. + */ + const char *getProperty( int nIndex ); + + /** + * Get a propery's value by name. + *@param sName The name of the property to examine. + *@returns A pointer to the value of the property specified, or NULL if none + * found. + */ + const char *getProperty( const char *sName ); + + /** + * Delete a property by index. + *@param nIndex The index of the property to delete. + *@returns True if the property was found and deleted, false if it wasn't + * found. + */ + bool deleteProperty( int nIndex ); + + /** + * Delete a child node, possibly replacing it with some text. This actually + * fixes all content strings around the newly deleted child node. + *@param nIndex The index of the node to delete. + *@param sReplacementText The optional text to replace the node with. + *@returns True of the node was found, and deleted, false if it wasn't + * found. + */ + bool deleteNode( int nIndex, const char *sReplacementText = NULL ); + + /** + * Delete a given node, but move all of it's children and content up to + * replace the deleted node. All of the content of the child node is + * spliced seamlessly into place with the parent node's content. + *@param nIndex The node to delete. + *@returns True if the node was found and deleted, false if it wasn't. + */ + bool deleteNodeKeepChildren( int nIndex ); + + /** + * Detatch a given child node from this node. This effectively works just + * like a deleteNode, except that instead of deleting the node it is removed + * and returned, and all ownership is given up. + *@param nIndex The index of the node to detatch. + *@param sReplacementText The optional text to replace the detatched node + * with. + *@returns A pointer to the newly detatched node, which then passes + * ownership to the caller. + */ + XmlNode *detatchNode( int nIndex, const char *sReplacementText = NULL ); + + /** + * Replace a given node with a different node that is not currently owned by + * this XmlNode or any ancestor. + *@param nIndex The index of the node to replace. + *@param pNewNode The new node to replace the old node with. + *@returns True if the node was found and replaced, false if it wasn't. + */ + bool replaceNode( int nIndex, XmlNode *pNewNode ); + + /** + * Replace a given node with the children and content of a given node. + *@param nIndex The index of the node to replace. + *@param pNewNode The node that contains the children and content that will + * replace the node specified by nIndex. + *@returns True if the node was found and replaced, false if it wasn't. + */ + bool replaceNodeWithChildren( int nIndex, XmlNode *pNewNode ); + + /** + * Get a copy of this node and all children. getCopy is recursive, so + * beware copying large trees of xml. + *@returns A newly created copy of this node and all of it's children. + */ + XmlNode *getCopy(); + +private: + std::string sName; /**< The name of the node. */ + std::string *sPreContent; /**< The content that goes before any node. */ + LinkedList lChildren; /**< The children. */ + LinkedList lPostContent; /**< The content that comes after children. */ + HashTable hProperties; /**< Property hashtable. */ + HashTable hChildren; /**< Children hashtable. */ + LinkedList lPropNames; /**< List of property names. */ + LinkedList lPropValues; /**< List of property values. */ + XmlNode *pParent; /**< A pointer to the parent of this node. */ + int nCurContent; /**< The current content we're on, for using the -1 on + setContent. */ +}; + +#endif diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp new file mode 100644 index 0000000..bb24157 --- /dev/null +++ b/src/xmlreader.cpp @@ -0,0 +1,412 @@ +#include "xmlreader.h" +#include <string.h> + +XmlReader::XmlReader( bool bStrip ) +{ + nError = 0; + this->bStrip = bStrip; +} + +XmlReader::~XmlReader() +{ +} + +#define gcall( x ) if( x == false ) return false; + +bool XmlReader::isws( char chr ) +{ + return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' ); +} + +bool XmlReader::ws() +{ + while( true ) + { + char chr = getChar(); + if( isws( chr ) ) + { + usedChar(); + } + else + { + return true; + } + } + return true; +} + +bool XmlReader::buildDoc() +{ + // take care of initial whitespace + gcall( ws() ); + gcall( node() ); + + return true; +} + +bool XmlReader::node() +{ + gcall( startNode() ) + + // At this point, we are closing the startNode + char chr = getChar(); + if( chr == '>' ) + { + usedChar(); + + // Now we process the guts of the node. + gcall( content() ); + } + else if( chr == '/' ) + { + // This is the tricky one, one more validation, then we close the node. + usedChar(); + if( getChar() == '>' ) + { + closeNode(); + usedChar(); + } + else + { + reportError("Close node in singleNode malformed!"); + return false; + } + } + else + { + reportError("Close node expected, but not found."); + return false; + } + + return true; +} + +bool XmlReader::startNode() +{ + if( getChar() == '<' ) + { + usedChar(); + + if( getChar() == '/' ) + { + // Heh, it's actually a close node, go figure + FlexBuf fbName; + usedChar(); + gcall( ws() ); + + while( true ) + { + char chr = getChar(); + if( isws( chr ) || chr == '>' ) + { + // Here we actually compare the name we got to the name + // we already set, they have to match exactly. + if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) + { + closeNode(); + break; + } + else + { + reportError("Got a mismatched node close tag."); + return false; + } + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + + gcall( ws() ); + if( getChar() == '>' ) + { + // Everything is cool. + usedChar(); + } + else + { + reportError("Got extra junk data instead of node close tag."); + return false; + } + } + else + { + // We're good, format is consistant + addNode(); + + // Skip extra whitespace + gcall( ws() ); + gcall( name() ); + gcall( ws() ); + gcall( paramlist() ); + gcall( ws() ); + } + } + else + { + reportError("Expected to find node opening char, '<'.\n"); + return false; + } + + return true; +} + +bool XmlReader::name() +{ + FlexBuf fbName; + + while( true ) + { + char chr = getChar(); + if( isws( chr ) || chr == '>' || chr == '/' ) + { + setName( fbName.getData() ); + return true; + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + + return true; +} + +bool XmlReader::paramlist() +{ + while( true ) + { + char chr = getChar(); + if( chr == '/' || chr == '>' ) + { + return true; + } + else + { + gcall( param() ); + gcall( ws() ); + } + } + + return true; +} + +char XmlReader::getEscape() +{ + // Right now, we just do # escapes... + if( getChar( 1 ) == '#' ) + { + usedChar(); + usedChar(); + char buf[4]; + int j = 0; + for( j = 0; getChar() != ';'; j++ ) + { + buf[j] = getChar(); + usedChar(); + } + usedChar(); + buf[j] = '\0'; + return (char)atoi( buf ); + } + else + { + return '\0'; + } +} + +bool XmlReader::param() +{ + FlexBuf fbName; + FlexBuf fbValue; + + while( true ) + { + char chr = getChar(); + if( isws( chr ) || chr == '=' ) + { + break; + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + + gcall( ws() ); + + if( getChar() == '=' ) + { + usedChar(); + + gcall( ws() ); + + char chr = getChar(); + if( chr == '"' ) + { + // Better quoted rhs + usedChar(); + + while( true ) + { + chr = getChar(); + if( chr == '"' ) + { + usedChar(); + addProperty( fbName.getData(), fbValue.getData() ); + return true; + } + else + { + if( chr == '&' ) + { + chr = getEscape(); + if( chr == '\0' ) return false; + fbValue.appendData( chr ); + } + else + { + fbValue.appendData( chr ); + usedChar(); + } + } + } + } + else + { + // Simple one-word rhs + while( true ) + { + chr = getChar(); + if( isws( chr ) || chr == '/' || chr == '>' ) + { + addProperty( fbName.getData(), fbValue.getData() ); + return true; + } + else + { + if( chr == '&' ) + { + chr = getEscape(); + if( chr == '\0' ) return false; + fbValue.appendData( chr ); + } + else + { + fbValue.appendData( chr ); + usedChar(); + } + } + } + } + } + else + { + reportError("Expected an equals to seperate the params."); + return false; + } + + return true; +} + +bool XmlReader::content() +{ + FlexBuf fbContent; + + if( bStrip ) gcall( ws() ); + + while( true ) + { + char chr = getChar(); + if( chr == '<' ) + { + if( getChar(1) == '/' ) + { + if( fbContent.getLength() > 0 ) + { + if( bStrip ) + { + int j; + for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); + ((char *)fbContent.getData())[j+1] = '\0'; + } + setContent( fbContent.getData() ); + } + usedChar(); + usedChar(); + gcall( ws() ); + FlexBuf fbName; + while( true ) + { + chr = getChar(); + if( isws( chr ) || chr == '>' ) + { + if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) + { + closeNode(); + break; + } + else + { + reportError("Mismatched close tag found."); + return false; + } + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + gcall( ws() ); + if( getChar() == '>' ) + { + usedChar(); + return true; + } + else + { + reportError("Malformed close tag."); + return false; + } + } + else + { + if( fbContent.getLength() > 0 ) + { + if( bStrip ) + { + int j; + for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); + ((char *)fbContent.getData())[j+1] = '\0'; + } + setContent( fbContent.getData() ); + fbContent.clearData(); + } + gcall( node() ); + } + + if( bStrip ) gcall( ws() ); + } + else + { + fbContent.appendData( chr ); + usedChar(); + } + } +} + +void XmlReader::reportError( const char *sError ) +{ + printf("XmlReader error: %s\n", sError ); +} + +int XmlReader::getError() +{ + return nError; +} + diff --git a/src/xmlreader.h b/src/xmlreader.h new file mode 100644 index 0000000..a8a81f0 --- /dev/null +++ b/src/xmlreader.h @@ -0,0 +1,133 @@ +#ifndef XMLREADER +#define XMLREADER + +#include <stdio.h> +#include "xmldocument.h" +#include "flexbuf.h" + +/** + * Takes care of reading in xml formatted data from a file. This could/should + * be made more arbitrary in the future so that we can read the data from any + * source. This is actually made quite simple already since all data read in + * is handled by one single helper function and then palced into a FlexBuf for + * easy access by the other functions. The FlexBuf also allows for block + * reading from disk, which improves speed by a noticable amount. + * <br> + * There are also some extra features implemented that allow you to break the + * standard XML reader specs and eliminate leading and trailing whitespace in + * all read content. This is useful in situations where you allow additional + * whitespace in the files to make them easily human readable. The resturned + * content will be NULL in sitautions where all content between nodes was + * stripped. + *@author Mike Buland + */ +class XmlReader : public XmlDocument +{ +public: + /** + * Create a standard XmlReader. The optional parameter bStrip allows you to + * create a reader that will strip out all leading and trailing whitespace + * in content, a-la html. + *@param bStrip Strip out leading and trailing whitespace? + */ + XmlReader( bool bStrip=false ); + + /** + * Destroy this XmlReader. + */ + ~XmlReader(); + + /** + * Get the error code if an error happened. + *@returns The error code (I don't know what they are either) + */ + int getError(); + + /** + * Report an error to something, this is a really strange mechanism and + * should probably just be replaced with the multi-log system. + *@param sError The error to report. + */ + void reportError( const char *sError ); + + /** + * Build a document based on some kind of input. This is called + * automatically by the constructor. + */ + bool buildDoc(); + +private: + /** + * This is called by the low level automoton in order to get the next + * character. This function should return a character at the current + * position plus nIndex, but does not increment the current character. + *@param nIndex The index of the character from the current stream position. + *@returns A single character at the requested position, or 0 for end of + * stream. + */ + virtual char getChar( int nIndex = 0 ) = 0; + + /** + * Called to increment the current stream position by a single character. + */ + virtual void usedChar() = 0; + + /** + * Automoton function: is whitespace. + *@param chr A character + *@returns True if chr is whitespace, false otherwise. + */ + bool isws( char chr ); + + /** + * Automoton function: ws. Skips sections of whitespace. + *@returns True if everything was ok, False for end of stream. + */ + bool ws(); + + /** + * Automoton function: node. Processes an XmlNode + *@returns True if everything was ok, False for end of stream. + */ + bool node(); + + /** + * Automoton function: startNode. Processes the begining of a node. + *@returns True if everything was ok, False for end of stream. + */ + bool startNode(); + + /** + * Automoton function: name. Processes the name of a node. + *@returns True if everything was ok, False for end of stream. + */ + bool name(); + + char getEscape(); + + /** + * Automoton function: paramlist. Processes a list of node params. + *@returns True if everything was ok, False for end of stream. + */ + bool paramlist(); + + /** + * Automoton function: param. Processes a single parameter. + *@returns True if everything was ok, False for end of stream. + */ + bool param(); + + /** + * Automoton function: content. Processes node content. + *@returns True if everything was ok, False for end of stream. + */ + bool content(); + + FlexBuf fbContent; /**< buffer for the current node's content. */ + FlexBuf fbParamName; /**< buffer for the current param's name. */ + FlexBuf fbParamValue; /**< buffer for the current param's value. */ + bool bStrip; /**< Are we stripping whitespace? */ + int nError; /**< Is there an error? */ +}; + +#endif diff --git a/src/xmlstringreader.cpp b/src/xmlstringreader.cpp new file mode 100644 index 0000000..aa7174f --- /dev/null +++ b/src/xmlstringreader.cpp @@ -0,0 +1,37 @@ +#include "xmlstringreader.h" +#include <string.h> + +XmlStringReader::XmlStringReader( const char *sString, bool bStrip ) + : XmlReader( bStrip ) +{ + this->sString = sString; + + nIndex = 0; + nLength = strlen( sString ); + + buildDoc(); +} + +XmlStringReader::~XmlStringReader() +{ +} + +char XmlStringReader::getChar( int nAdd ) +{ + if( nLength >= nIndex+nAdd+1 ) + { + return sString[nIndex+nAdd]; + } + else + { + return '\0'; + } +} + +void XmlStringReader::usedChar() +{ + if( nLength >= nIndex+1 ) + { + nIndex++; + } +} diff --git a/src/xmlstringreader.h b/src/xmlstringreader.h new file mode 100644 index 0000000..07da83c --- /dev/null +++ b/src/xmlstringreader.h @@ -0,0 +1,49 @@ +#ifndef XMLSTRINGREADER +#define XMLSTRINGREADER + +#include <stdio.h> +#include "xmlreader.h" +#include "flexbuf.h" + +/** + * Takes care of reading in xml formatted data from a file. This could/should + * be made more arbitrary in the future so that we can read the data from any + * source. This is actually made quite simple already since all data read in + * is handled by one single helper function and then palced into a FlexBuf for + * easy access by the other functions. The FlexBuf also allows for block + * reading from disk, which improves speed by a noticable amount. + * <br> + * There are also some extra features implemented that allow you to break the + * standard XML reader specs and eliminate leading and trailing whitespace in + * all read content. This is useful in situations where you allow additional + * whitespace in the files to make them easily human readable. The resturned + * content will be NULL in sitautions where all content between nodes was + * stripped. + *@author Mike Buland + */ +class XmlStringReader : public XmlReader +{ +public: + /** + * Create a new string reader around an already created and formatted + * null-terminated string. + *@param sString A pointer to the string data that will be used. This data + * is not changed during processing. + *@param bStrip Strip out leading and trailing whitespace. + */ + XmlStringReader( const char *sString, bool bStrip=false ); + + /** + * Destroy this string reader. + */ + ~XmlStringReader(); + +private: + char getChar( int nIndex = 0 ); + void usedChar(); + const char *sString; /**< Internal pointer to the input string. */ + int nIndex; /**< Our index into the string */ + int nLength; /**< The computed length of the string */ +}; + +#endif diff --git a/src/xmlstringwriter.cpp b/src/xmlstringwriter.cpp new file mode 100644 index 0000000..adeed6a --- /dev/null +++ b/src/xmlstringwriter.cpp @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <stdlib.h> +#include "xmlstringwriter.h" + +XmlStringWriter::XmlStringWriter( const char *sIndent ) : + XmlWriter( sIndent ) +{ +} + +XmlStringWriter::~XmlStringWriter() +{ +} + +void XmlStringWriter::writeString( const char *sString ) +{ + sXml += sString; +} + +std::string &XmlStringWriter::getString() +{ + return sXml; +} + diff --git a/src/xmlstringwriter.h b/src/xmlstringwriter.h new file mode 100644 index 0000000..530db3e --- /dev/null +++ b/src/xmlstringwriter.h @@ -0,0 +1,50 @@ +#ifndef XML_STRING_WRITER +#define XML_STRING_WRITER + +#include "xmlnode.h" +#include "xmlwriter.h" + +/** + * Implements xml writing in the XML standard format. Also allows you to + * break that format and auto-indent your exported xml data for ease of + * reading. The auto-indenting will only be applied to sections that + * have no content of their own already. This means that except for + * whitespace all of your data will be preserved perfectly. + * You can create an XmlWriter object around a file, or access the static + * write function directly and just hand it a filename and a root XmlNode. + * When using an XmlWriter object the interface is identicle to that of + * the XmlDocument class, so reference that class for API info. However + * when the initial (or root) node is closed, and the document is finished + * the file will be created and written to automatically. The user can + * check to see if this is actually true by calling the isFinished + * function in the XmlDocument class. + *@author Mike Buland + */ +class XmlStringWriter : public XmlWriter +{ +public: + /** + * Construct a string writer using an internal string buffer. + *@param sIndent Optional indent to add to each line. + */ + XmlStringWriter( const char *sIndent=NULL ); + + /** + * Destroy the string writer and the internal string. + */ + ~XmlStringWriter(); + + /** + * Get the string that was built. This is only valid after the document has + * been completed, so check isCompleted or be sure your addNode and + * closeNode calls match up. + *@returns A reference to the internal string object. + */ + std::string &getString(); + +private: + void writeString( const char *sString ); + std::string sXml; /**< The string object we "write" to. */ +}; + +#endif diff --git a/src/xmlwriter.cpp b/src/xmlwriter.cpp new file mode 100644 index 0000000..236939d --- /dev/null +++ b/src/xmlwriter.cpp @@ -0,0 +1,173 @@ +#include <stdio.h> +#include <stdlib.h> +#include "xmlwriter.h" + +XmlWriter::XmlWriter( const char *sIndent, XmlNode *pRoot ) : + XmlDocument( pRoot ) +{ + if( sIndent == NULL ) + { + this->sIndent = ""; + } + else + { + this->sIndent = sIndent; + } +} + +XmlWriter::~XmlWriter() +{ +} + +void XmlWriter::write() +{ + write( getRoot(), sIndent.c_str() ); +} + +void XmlWriter::write( XmlNode *pRoot, const char *sIndent ) +{ + writeNode( pRoot, 0, sIndent ); +} + +void XmlWriter::closeNode() +{ + XmlDocument::closeNode(); + + if( isCompleted() ) + { + write( getRoot(), sIndent.c_str() ); + } +} + +void XmlWriter::writeIndent( int nIndent, const char *sIndent ) +{ + if( sIndent == NULL ) return; + for( int j = 0; j < nIndent; j++ ) + { + writeString( sIndent ); + } +} + +std::string XmlWriter::escape( std::string sIn ) +{ + std::string sOut; + + std::string::const_iterator i; + for( i = sIn.begin(); i != sIn.end(); i++ ) + { + if( ((*i >= ' ' && *i <= '9') || + (*i >= 'a' && *i <= 'z') || + (*i >= 'A' && *i <= 'Z') ) && + (*i != '\"' && *i != '\'' ) + ) + { + sOut += *i; + } + else + { + sOut += "&#"; + char buf[4]; + sprintf( buf, "%d", *i ); + sOut += buf; + sOut += ';'; + } + } + + return sOut; +} + +void XmlWriter::writeNodeProps( XmlNode *pNode, int nIndent, const char *sIndent ) +{ + for( int j = 0; j < pNode->getNumProperties(); j++ ) + { + writeString(" "); + writeString( pNode->getPropertyName( j ) ); + writeString("=\""); + writeString( escape( pNode->getProperty( j ) ).c_str() ); + writeString("\""); + } +} + +void XmlWriter::writeNode( XmlNode *pNode, int nIndent, const char *sIndent ) +{ + if( pNode->hasChildren() ) + { + writeIndent( nIndent, sIndent ); + writeString("<"); + writeString( pNode->getName() ); + writeNodeProps( pNode, nIndent, sIndent ); + if( sIndent ) + writeString(">\n"); + else + writeString(">"); + + if( pNode->getContent( 0 ) ) + { + writeIndent( nIndent+1, sIndent ); + if( sIndent ) + { + writeString( pNode->getContent( 0 ) ); + writeString("\n"); + } + else + writeString( pNode->getContent( 0 ) ); + } + + int nNumChildren = pNode->getNumChildren(); + for( int j = 0; j < nNumChildren; j++ ) + { + writeNode( pNode->getChild( j ), nIndent+1, sIndent ); + if( pNode->getContent( j+1 ) ) + { + writeIndent( nIndent+1, sIndent ); + if( sIndent ) + { + writeString( pNode->getContent( j+1 ) ); + writeString("\n"); + } + else + writeString( pNode->getContent( j+1 ) ); + } + } + + writeIndent( nIndent, sIndent ); + if( sIndent ) + { + writeString("</"); + writeString( pNode->getName() ); + writeString(">\n"); + } + else + { + writeString("</"); + writeString( pNode->getName() ); + writeString(">"); + } + } + else if( pNode->getContent() ) + { + writeIndent( nIndent, sIndent ); + writeString("<"); + writeString( pNode->getName() ); + writeNodeProps( pNode, nIndent, sIndent ); + writeString(">"); + writeString( pNode->getContent() ); + writeString("</"); + writeString( pNode->getName() ); + writeString(">"); + if( sIndent ) + writeString("\n"); + } + else + { + writeIndent( nIndent, sIndent ); + writeString("<"); + writeString( pNode->getName() ); + writeNodeProps( pNode, nIndent, sIndent ); + if( sIndent ) + writeString("/>\n"); + else + writeString("/>"); + } +} + diff --git a/src/xmlwriter.h b/src/xmlwriter.h new file mode 100644 index 0000000..5bc3f0a --- /dev/null +++ b/src/xmlwriter.h @@ -0,0 +1,96 @@ +#ifndef XMLWRITER +#define XMLWRITER + +#include "xmlnode.h" +#include "xmldocument.h" + +/** + * Implements xml writing in the XML standard format. Also allows you to + * break that format and auto-indent your exported xml data for ease of + * reading. The auto-indenting will only be applied to sections that + * have no content of their own already. This means that except for + * whitespace all of your data will be preserved perfectly. + * You can create an XmlWriter object around a file, or access the static + * write function directly and just hand it a filename and a root XmlNode. + * When using an XmlWriter object the interface is identicle to that of + * the XmlDocument class, so reference that class for API info. However + * when the initial (or root) node is closed, and the document is finished + * the file will be created and written to automatically. The user can + * check to see if this is actually true by calling the isFinished + * function in the XmlDocument class. + *@author Mike Buland + */ +class XmlWriter : public XmlDocument +{ +public: + /** + * Construct a standard XmlWriter. + *@param sIndent Set this to something other than NULL to include it as an + * indent before each node in the output that doesn't already have content. + * If you are using the whitespace stripping option in the XmlReader and set + * this to a tab or some spaces it will never effect the content of your + * file. + */ + XmlWriter( const char *sIndent=NULL, XmlNode *pRoot=NULL ); + + /** + * Destroy the writer. + */ + ~XmlWriter(); + + /** + * This override of the parent class closeNode function calls the parent + * class, but also triggers a write operation when the final node is closed. + * This means that by checking the isCompleted() function the user may also + * check to see if their file has been written or not. + */ + void closeNode(); + + void write(); + +private: + std::string sIndent; /**< The indent string */ + + std::string escape( std::string sIn ); + + /** + * Write the file. + *@param pNode The root node + *@param sIndent The indent text. + */ + void write( XmlNode *pNode, const char *sIndent=NULL ); + + /** + * Write a node in the file, including children. + *@param pNode The node to write. + *@param nIndent The indent level (the number of times to include sIndent) + *@param sIndent The indent text. + */ + void writeNode( XmlNode *pNode, int nIndent, const char *sIndent ); + + /** + * Write the properties of a node. + *@param pNode The node who's properties to write. + *@param nIndent The indent level of the containing node + *@param sIndent The indent text. + */ + void writeNodeProps( XmlNode *pNode, int nIndent, const char *sIndent ); + + /** + * Called to write the actual indent. + *@param nIndent The indent level. + *@param sIndent The indent text. + */ + void writeIndent( int nIndent, const char *sIndent ); + + /** + * This is the function that must be overridden in order to use this class. + * It must write the null-terminated string sString, minus the mull, + * verbatum to it's output device. Adding extra characters for any reason + * will break the XML formatting. + *@param sString The string data to write to the output. + */ + virtual void writeString( const char *sString ) = 0; +}; + +#endif -- cgit v1.2.3