|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object | +--org.w3c.tidy.Tidy
HTML parser and pretty printer
(c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.java for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts Institute of Technology, Institut National de Recherche en Informatique et en Automatique, Keio University). All Rights Reserved.
Contributing Author(s):
Dave Raggett
Andy Quick (translation to Java)
The contributing author(s) would like to thank all those who helped with testing, bug fixes, and patience. This wouldn't have been possible without all of you.
COPYRIGHT NOTICE:
This software and documentation is provided "as is," and
the copyright holders and contributing author(s) make no
representations or warranties, express or implied, including
but not limited to, warranties of merchantability or fitness
for any particular purpose or that the use of the software or
documentation will not infringe any third party patents,
copyrights, trademarks or other rights.
The copyright holders and contributing author(s) will not be liable for any direct, indirect, special or consequential damages arising out of any use of the software or documentation, even if advised of the possibility of such damage.
Permission is hereby granted to use, copy, modify, and distribute this source code, or portions hereof, documentation and executables, for any purpose, without fee, subject to the following restrictions:
The copyright holders and contributing author(s) specifically permit, without fee, and encourage the use of this source code as a component for supporting the Hypertext Markup Language in commercial products. If you use this source code in a product, acknowledgment is not required but would be appreciated.
Constructor Summary | |
Tidy()
|
|
Tidy(boolean configLogger)
|
Method Summary | |
static Document |
createEmptyDocument()
Creates an empty DOM Document. |
String |
getAltText()
|
boolean |
getBreakBeforeBR()
|
boolean |
getBurstSlides()
|
int |
getCharEncoding()
|
org.w3c.tidy.Configuration |
getConfiguration()
|
String |
getDocType()
|
boolean |
getDropEmptyParas()
|
boolean |
getDropFontTags()
|
boolean |
getEncloseBlockText()
|
boolean |
getEncloseText()
|
boolean |
getFixBackslash()
|
boolean |
getFixComments()
|
boolean |
getHideEndTags()
|
boolean |
getIndentAttributes()
|
boolean |
getIndentContent()
|
String |
getInputStreamName()
|
boolean |
getKeepFileTimes()
|
boolean |
getLiteralAttribs()
|
boolean |
getLogicalEmphasis()
|
boolean |
getMakeClean()
|
boolean |
getNumEntities()
|
int |
getParseErrors()
ParseErrors - the number of errors that occurred in the most recent parse operation |
int |
getParseWarnings()
ParseWarnings - the number of warnings that occurred in the most recent parse operation |
boolean |
getQuoteAmpersand()
|
boolean |
getQuoteMarks()
|
boolean |
getQuoteNbsp()
|
boolean |
getRawOut()
|
String |
getSlidestyle()
|
boolean |
getSmartIndent()
|
int |
getSpaces()
|
int |
getTabsize()
|
boolean |
getTidyMark()
|
boolean |
getUpperCaseAttrs()
|
boolean |
getUpperCaseTags()
|
boolean |
getWord2000()
|
boolean |
getWrapAsp()
|
boolean |
getWrapAttVals()
|
boolean |
getWrapJste()
|
int |
getWraplen()
|
boolean |
getWrapPhp()
|
boolean |
getWrapScriptlets()
|
boolean |
getWrapSection()
|
boolean |
getWriteback()
|
boolean |
getXHTML()
|
boolean |
getXmlOut()
|
boolean |
getXmlPi()
|
boolean |
getXmlPIs()
|
boolean |
getXmlSpace()
|
boolean |
getXmlTags()
|
static void |
main(String[] argv)
Command line interface to parser and pretty printer. |
org.w3c.tidy.Node |
parse(InputStream in,
OutputStream out)
Parses InputStream in and returns the root Node. |
Document |
parseDOM(InputStream in,
OutputStream out)
Parses InputStream in and returns a DOM Document node. |
void |
pprint(Document doc,
OutputStream out)
Pretty-prints a DOM Document. |
void |
setAltText(String altText)
AltText - default text for alt attribute |
void |
setBreakBeforeBR(boolean BreakBeforeBR)
BreakBeforeBR - o/p newline before <br> or not? |
void |
setBurstSlides(boolean BurstSlides)
BurstSlides - create slides on each h2 element |
void |
setCharEncoding(int charencoding)
CharEncoding |
void |
setConfigurationFromFile(String filename)
Sets the configuration from a configuration file. |
void |
setConfigurationFromProps(Properties props)
Sets the configuration from a properties object. |
void |
setDocType(String doctype)
DocType - user specified doctype omit | auto | strict | loose | fpi where the fpi is a string similar to "-//ACME//DTD HTML 3.14159//EN" Note: for fpi include the double-quotes in the string. |
void |
setDropEmptyParas(boolean DropEmptyParas)
DropEmptyParas - discard empty p elements |
void |
setDropFontTags(boolean DropFontTags)
DropFontTags - discard presentation tags |
void |
setEncloseBlockText(boolean EncloseBlockText)
EncloseBlockText - if true text in blocks is wrapped in <p>'s |
void |
setEncloseText(boolean EncloseText)
EncloseText - if true text at body is wrapped in <p>'s |
void |
setFixBackslash(boolean FixBackslash)
FixBackslash - fix URLs by replacing \ with / |
void |
setFixComments(boolean FixComments)
FixComments - fix comments with adjacent hyphens |
void |
setHideEndTags(boolean HideEndTags)
HideEndTags - suppress optional end tags |
void |
setIndentAttributes(boolean IndentAttributes)
IndentAttributes - newline+indent before each attribute |
void |
setIndentContent(boolean IndentContent)
IndentContent - indent content of appropriate tags |
void |
setInputStreamName(String name)
InputStreamName - the name of the input stream (printed in the header information). |
void |
setKeepFileTimes(boolean KeepFileTimes)
KeepFileTimes - if true last modified time is preserved this is NOT supported at this time. |
void |
setLiteralAttribs(boolean LiteralAttribs)
LiteralAttribs - if true attributes may use newlines |
void |
setLogicalEmphasis(boolean LogicalEmphasis)
LogicalEmphasis - replace i by em and b by strong |
void |
setMakeClean(boolean MakeClean)
MakeClean - remove presentational clutter |
void |
setNumEntities(boolean NumEntities)
NumEntities - use numeric entities |
void |
setQuoteAmpersand(boolean QuoteAmpersand)
QuoteAmpersand - output naked ampersand as & |
void |
setQuoteMarks(boolean QuoteMarks)
QuoteMarks - output " marks as " |
void |
setQuoteNbsp(boolean QuoteNbsp)
QuoteNbsp - output non-breaking space as entity |
void |
setRawOut(boolean RawOut)
RawOut - avoid mapping values > 127 to entities |
void |
setSlidestyle(String slidestyle)
Slidestyle - style sheet for slides |
void |
setSmartIndent(boolean SmartIndent)
SmartIndent - does text/block level content effect indentation |
void |
setSpaces(int spaces)
Spaces - default indentation |
void |
setTabsize(int tabsize)
Tabsize |
void |
setTidyMark(boolean TidyMark)
TidyMark - add meta element indicating tidied doc |
void |
setUpperCaseAttrs(boolean UpperCaseAttrs)
UpperCaseAttrs - output attributes in upper not lower case |
void |
setUpperCaseTags(boolean UpperCaseTags)
UpperCaseTags - output tags in upper not lower case |
void |
setWord2000(boolean Word2000)
Word2000 - draconian cleaning for Word2000 |
void |
setWrapAsp(boolean WrapAsp)
WrapAsp - wrap within ASP pseudo elements |
void |
setWrapAttVals(boolean WrapAttVals)
WrapAttVals - wrap within attribute values |
void |
setWrapJste(boolean WrapJste)
WrapJste - wrap within JSTE pseudo elements |
void |
setWraplen(int wraplen)
Wraplen - default wrap margin |
void |
setWrapPhp(boolean WrapPhp)
WrapPhp - wrap within PHP pseudo elements |
void |
setWrapScriptlets(boolean WrapScriptlets)
WrapScriptlets - wrap within JavaScript string literals |
void |
setWrapSection(boolean WrapSection)
WrapSection - wrap within <! |
void |
setWriteback(boolean writeback)
Writeback - if true then output tidied markup NOTE: this property is ignored when parsing from an InputStream. |
void |
setXHTML(boolean xHTML)
XHTML - output extensible HTML |
void |
setXmlOut(boolean XmlOut)
XmlOut - create output as XML |
void |
setXmlPi(boolean XmlPi)
XmlPi - add <? |
void |
setXmlPIs(boolean XmlPIs)
XmlPIs - if set to true PIs must end with ? |
void |
setXmlSpace(boolean XmlSpace)
XmlSpace - if set to yes adds xml:space attr as needed |
void |
setXmlTags(boolean XmlTags)
XmlTags - treat input as XML |
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
public Tidy()
public Tidy(boolean configLogger)
Method Detail |
public org.w3c.tidy.Configuration getConfiguration()
public int getParseErrors()
public int getParseWarnings()
public void setSpaces(int spaces)
Configuration.spaces
public int getSpaces()
public void setWraplen(int wraplen)
Configuration.wraplen
public int getWraplen()
public void setCharEncoding(int charencoding)
Configuration.CharEncoding
public int getCharEncoding()
public void setTabsize(int tabsize)
Configuration.tabsize
public int getTabsize()
public void setWriteback(boolean writeback)
Configuration.writeback
public boolean getWriteback()
public void setIndentContent(boolean IndentContent)
Configuration.IndentContent
public boolean getIndentContent()
public void setSmartIndent(boolean SmartIndent)
Configuration.SmartIndent
public boolean getSmartIndent()
public void setHideEndTags(boolean HideEndTags)
Configuration.HideEndTags
public boolean getHideEndTags()
public void setXmlTags(boolean XmlTags)
Configuration.XmlTags
public boolean getXmlTags()
public void setXmlOut(boolean XmlOut)
Configuration.XmlOut
public boolean getXmlOut()
public void setXHTML(boolean xHTML)
Configuration.xHTML
public boolean getXHTML()
public void setRawOut(boolean RawOut)
Configuration.RawOut
public boolean getRawOut()
public void setUpperCaseTags(boolean UpperCaseTags)
Configuration.UpperCaseTags
public boolean getUpperCaseTags()
public void setUpperCaseAttrs(boolean UpperCaseAttrs)
Configuration.UpperCaseAttrs
public boolean getUpperCaseAttrs()
public void setMakeClean(boolean MakeClean)
Configuration.MakeClean
public boolean getMakeClean()
public void setBreakBeforeBR(boolean BreakBeforeBR)
Configuration.BreakBeforeBR
public boolean getBreakBeforeBR()
public void setBurstSlides(boolean BurstSlides)
Configuration.BurstSlides
public boolean getBurstSlides()
public void setNumEntities(boolean NumEntities)
Configuration.NumEntities
public boolean getNumEntities()
public void setQuoteMarks(boolean QuoteMarks)
Configuration.QuoteMarks
public boolean getQuoteMarks()
public void setQuoteNbsp(boolean QuoteNbsp)
Configuration.QuoteNbsp
public boolean getQuoteNbsp()
public void setQuoteAmpersand(boolean QuoteAmpersand)
Configuration.QuoteAmpersand
public boolean getQuoteAmpersand()
public void setWrapAttVals(boolean WrapAttVals)
Configuration.WrapAttVals
public boolean getWrapAttVals()
public void setWrapScriptlets(boolean WrapScriptlets)
Configuration.WrapScriptlets
public boolean getWrapScriptlets()
public void setWrapSection(boolean WrapSection)
Configuration.WrapSection
public boolean getWrapSection()
public void setAltText(String altText)
Configuration.altText
public String getAltText()
public void setSlidestyle(String slidestyle)
Configuration.slidestyle
public String getSlidestyle()
public void setXmlPi(boolean XmlPi)
Configuration.XmlPi
public boolean getXmlPi()
public void setDropFontTags(boolean DropFontTags)
Configuration.DropFontTags
public boolean getDropFontTags()
public void setDropEmptyParas(boolean DropEmptyParas)
Configuration.DropEmptyParas
public boolean getDropEmptyParas()
public void setFixComments(boolean FixComments)
Configuration.FixComments
public boolean getFixComments()
public void setWrapAsp(boolean WrapAsp)
Configuration.WrapAsp
public boolean getWrapAsp()
public void setWrapJste(boolean WrapJste)
Configuration.WrapJste
public boolean getWrapJste()
public void setWrapPhp(boolean WrapPhp)
Configuration.WrapPhp
public boolean getWrapPhp()
public void setFixBackslash(boolean FixBackslash)
Configuration.FixBackslash
public boolean getFixBackslash()
public void setIndentAttributes(boolean IndentAttributes)
Configuration.IndentAttributes
public boolean getIndentAttributes()
public void setDocType(String doctype)
Configuration.docTypeStr
,
Configuration.docTypeMode
public String getDocType()
public void setLogicalEmphasis(boolean LogicalEmphasis)
Configuration.LogicalEmphasis
public boolean getLogicalEmphasis()
public void setXmlPIs(boolean XmlPIs)
Configuration.XmlPIs
public boolean getXmlPIs()
public void setEncloseText(boolean EncloseText)
Configuration.EncloseBodyText
public boolean getEncloseText()
public void setEncloseBlockText(boolean EncloseBlockText)
Configuration.EncloseBlockText
public boolean getEncloseBlockText()
public void setKeepFileTimes(boolean KeepFileTimes)
Configuration.KeepFileTimes
public boolean getKeepFileTimes()
public void setWord2000(boolean Word2000)
Configuration.Word2000
public boolean getWord2000()
public void setTidyMark(boolean TidyMark)
Configuration.TidyMark
public boolean getTidyMark()
public void setXmlSpace(boolean XmlSpace)
Configuration.XmlSpace
public boolean getXmlSpace()
public void setLiteralAttribs(boolean LiteralAttribs)
Configuration.LiteralAttribs
public boolean getLiteralAttribs()
public void setInputStreamName(String name)
public String getInputStreamName()
public void setConfigurationFromFile(String filename)
public void setConfigurationFromProps(Properties props)
public org.w3c.tidy.Node parse(InputStream in, OutputStream out)
public Document parseDOM(InputStream in, OutputStream out)
public static Document createEmptyDocument()
public void pprint(Document doc, OutputStream out)
public static void main(String[] argv)
|
|||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |