forked from wallabag/wallabag
first commit
This commit is contained in:
262
inc/Encoding.php
Executable file
262
inc/Encoding.php
Executable file
@ -0,0 +1,262 @@
|
||||
<?php
|
||||
/**
|
||||
* @author "Sebastián Grignoli" <grignoli@framework2.com.ar>
|
||||
* @package Encoding
|
||||
* @version 1.1
|
||||
* @link http://www.framework2.com.ar/dzone/forceUTF8-es/
|
||||
* @example http://www.framework2.com.ar/dzone/forceUTF8-es/
|
||||
*/
|
||||
|
||||
class Encoding {
|
||||
|
||||
protected static $win1252ToUtf8 = array(
|
||||
128 => "\xe2\x82\xac",
|
||||
|
||||
130 => "\xe2\x80\x9a",
|
||||
131 => "\xc6\x92",
|
||||
132 => "\xe2\x80\x9e",
|
||||
133 => "\xe2\x80\xa6",
|
||||
134 => "\xe2\x80\xa0",
|
||||
135 => "\xe2\x80\xa1",
|
||||
136 => "\xcb\x86",
|
||||
137 => "\xe2\x80\xb0",
|
||||
138 => "\xc5\xa0",
|
||||
139 => "\xe2\x80\xb9",
|
||||
140 => "\xc5\x92",
|
||||
|
||||
142 => "\xc5\xbd",
|
||||
|
||||
|
||||
145 => "\xe2\x80\x98",
|
||||
146 => "\xe2\x80\x99",
|
||||
147 => "\xe2\x80\x9c",
|
||||
148 => "\xe2\x80\x9d",
|
||||
149 => "\xe2\x80\xa2",
|
||||
150 => "\xe2\x80\x93",
|
||||
151 => "\xe2\x80\x94",
|
||||
152 => "\xcb\x9c",
|
||||
153 => "\xe2\x84\xa2",
|
||||
154 => "\xc5\xa1",
|
||||
155 => "\xe2\x80\xba",
|
||||
156 => "\xc5\x93",
|
||||
|
||||
158 => "\xc5\xbe",
|
||||
159 => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $brokenUtf8ToUtf8 = array(
|
||||
"\xc2\x80" => "\xe2\x82\xac",
|
||||
|
||||
"\xc2\x82" => "\xe2\x80\x9a",
|
||||
"\xc2\x83" => "\xc6\x92",
|
||||
"\xc2\x84" => "\xe2\x80\x9e",
|
||||
"\xc2\x85" => "\xe2\x80\xa6",
|
||||
"\xc2\x86" => "\xe2\x80\xa0",
|
||||
"\xc2\x87" => "\xe2\x80\xa1",
|
||||
"\xc2\x88" => "\xcb\x86",
|
||||
"\xc2\x89" => "\xe2\x80\xb0",
|
||||
"\xc2\x8a" => "\xc5\xa0",
|
||||
"\xc2\x8b" => "\xe2\x80\xb9",
|
||||
"\xc2\x8c" => "\xc5\x92",
|
||||
|
||||
"\xc2\x8e" => "\xc5\xbd",
|
||||
|
||||
|
||||
"\xc2\x91" => "\xe2\x80\x98",
|
||||
"\xc2\x92" => "\xe2\x80\x99",
|
||||
"\xc2\x93" => "\xe2\x80\x9c",
|
||||
"\xc2\x94" => "\xe2\x80\x9d",
|
||||
"\xc2\x95" => "\xe2\x80\xa2",
|
||||
"\xc2\x96" => "\xe2\x80\x93",
|
||||
"\xc2\x97" => "\xe2\x80\x94",
|
||||
"\xc2\x98" => "\xcb\x9c",
|
||||
"\xc2\x99" => "\xe2\x84\xa2",
|
||||
"\xc2\x9a" => "\xc5\xa1",
|
||||
"\xc2\x9b" => "\xe2\x80\xba",
|
||||
"\xc2\x9c" => "\xc5\x93",
|
||||
|
||||
"\xc2\x9e" => "\xc5\xbe",
|
||||
"\xc2\x9f" => "\xc5\xb8"
|
||||
);
|
||||
|
||||
protected static $utf8ToWin1252 = array(
|
||||
"\xe2\x82\xac" => "\x80",
|
||||
|
||||
"\xe2\x80\x9a" => "\x82",
|
||||
"\xc6\x92" => "\x83",
|
||||
"\xe2\x80\x9e" => "\x84",
|
||||
"\xe2\x80\xa6" => "\x85",
|
||||
"\xe2\x80\xa0" => "\x86",
|
||||
"\xe2\x80\xa1" => "\x87",
|
||||
"\xcb\x86" => "\x88",
|
||||
"\xe2\x80\xb0" => "\x89",
|
||||
"\xc5\xa0" => "\x8a",
|
||||
"\xe2\x80\xb9" => "\x8b",
|
||||
"\xc5\x92" => "\x8c",
|
||||
|
||||
"\xc5\xbd" => "\x8e",
|
||||
|
||||
|
||||
"\xe2\x80\x98" => "\x91",
|
||||
"\xe2\x80\x99" => "\x92",
|
||||
"\xe2\x80\x9c" => "\x93",
|
||||
"\xe2\x80\x9d" => "\x94",
|
||||
"\xe2\x80\xa2" => "\x95",
|
||||
"\xe2\x80\x93" => "\x96",
|
||||
"\xe2\x80\x94" => "\x97",
|
||||
"\xcb\x9c" => "\x98",
|
||||
"\xe2\x84\xa2" => "\x99",
|
||||
"\xc5\xa1" => "\x9a",
|
||||
"\xe2\x80\xba" => "\x9b",
|
||||
"\xc5\x93" => "\x9c",
|
||||
|
||||
"\xc5\xbe" => "\x9e",
|
||||
"\xc5\xb8" => "\x9f"
|
||||
);
|
||||
|
||||
static function toUTF8($text){
|
||||
/**
|
||||
* Function Encoding::toUTF8
|
||||
*
|
||||
* This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
|
||||
*
|
||||
* It assumes that the encoding of the original string is either Windows-1252 or ISO 8859-1.
|
||||
*
|
||||
* It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
|
||||
*
|
||||
* 1) when any of these characters: ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
|
||||
* are followed by any of these: ("group B")
|
||||
* ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶•¸¹º»¼½¾¿
|
||||
* For example: %ABREPRESENT%C9%BB. «REPRESENTÉ»
|
||||
* The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
|
||||
* is also a valid unicode character, and will be left unchanged.
|
||||
*
|
||||
* 2) when any of these: àáâãäåæçèéêëìíîï are followed by TWO chars from group B,
|
||||
* 3) when any of these: ðñòó are followed by THREE chars from group B.
|
||||
*
|
||||
* @name toUTF8
|
||||
* @param string $text Any string.
|
||||
* @return string The same string, UTF8 encoded
|
||||
*
|
||||
*/
|
||||
|
||||
if(is_array($text))
|
||||
{
|
||||
foreach($text as $k => $v)
|
||||
{
|
||||
$text[$k] = self::toUTF8($v);
|
||||
}
|
||||
return $text;
|
||||
} elseif(is_string($text)) {
|
||||
|
||||
$max = strlen($text);
|
||||
$buf = "";
|
||||
for($i = 0; $i < $max; $i++){
|
||||
$c1 = $text{$i};
|
||||
if($c1>="\xc0"){ //Should be converted to UTF8, if it's not UTF8 already
|
||||
$c2 = $i+1 >= $max? "\x00" : $text{$i+1};
|
||||
$c3 = $i+2 >= $max? "\x00" : $text{$i+2};
|
||||
$c4 = $i+3 >= $max? "\x00" : $text{$i+3};
|
||||
if($c1 >= "\xc0" & $c1 <= "\xdf"){ //looks like 2 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2;
|
||||
$i++;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xe0" & $c1 <= "\xef"){ //looks like 3 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3;
|
||||
$i = $i + 2;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif($c1 >= "\xf0" & $c1 <= "\xf7"){ //looks like 4 bytes UTF8
|
||||
if($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf"){ //yeah, almost sure it's UTF8 already
|
||||
$buf .= $c1 . $c2 . $c3;
|
||||
$i = $i + 2;
|
||||
} else { //not valid UTF8. Convert it.
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = ($c1 & "\x3f") | "\x80";
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { //doesn't look like UTF8, but should be converted
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} elseif(($c1 & "\xc0") == "\x80"){ // needs conversion
|
||||
if(isset(self::$win1252ToUtf8[ord($c1)])) { //found in Windows-1252 special cases
|
||||
$buf .= self::$win1252ToUtf8[ord($c1)];
|
||||
} else {
|
||||
$cc1 = (chr(ord($c1) / 64) | "\xc0");
|
||||
$cc2 = (($c1 & "\x3f") | "\x80");
|
||||
$buf .= $cc1 . $cc2;
|
||||
}
|
||||
} else { // it doesn't need convesion
|
||||
$buf .= $c1;
|
||||
}
|
||||
}
|
||||
return $buf;
|
||||
} else {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
static function toWin1252($text) {
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::toWin1252($v);
|
||||
}
|
||||
return $text;
|
||||
} elseif(is_string($text)) {
|
||||
return utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), self::toUTF8($text)));
|
||||
} else {
|
||||
return $text;
|
||||
}
|
||||
}
|
||||
|
||||
static function toISO8859($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function toLatin1($text) {
|
||||
return self::toWin1252($text);
|
||||
}
|
||||
|
||||
static function fixUTF8($text){
|
||||
if(is_array($text)) {
|
||||
foreach($text as $k => $v) {
|
||||
$text[$k] = self::fixUTF8($v);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
|
||||
$last = "";
|
||||
while($last <> $text){
|
||||
$last = $text;
|
||||
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
|
||||
}
|
||||
$text = self::toUTF8(utf8_decode(str_replace(array_keys(self::$utf8ToWin1252), array_values(self::$utf8ToWin1252), $text)));
|
||||
return $text;
|
||||
}
|
||||
|
||||
static function UTF8FixWin1252Chars($text){
|
||||
// If you received an UTF-8 string that was converted from Windows-1252 as it was ISO8859-1
|
||||
// (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
|
||||
// See: http://en.wikipedia.org/wiki/Windows-1252
|
||||
|
||||
return str_replace(array_keys(self::$brokenUtf8ToUtf8), array_values(self::$brokenUtf8ToUtf8), $text);
|
||||
}
|
||||
|
||||
static function removeBOM($str=""){
|
||||
if(substr($str, 0,3) == pack("CCC",0xef,0xbb,0xbf)) {
|
||||
$str=substr($str, 3);
|
||||
}
|
||||
return $str;
|
||||
}
|
||||
}
|
||||
110
inc/JSLikeHTMLElement.php
Executable file
110
inc/JSLikeHTMLElement.php
Executable file
@ -0,0 +1,110 @@
|
||||
<?php
|
||||
/**
|
||||
* JavaScript-like HTML DOM Element
|
||||
*
|
||||
* This class extends PHP's DOMElement to allow
|
||||
* users to get and set the innerHTML property of
|
||||
* HTML elements in the same way it's done in
|
||||
* JavaScript.
|
||||
*
|
||||
* Example usage:
|
||||
* @code
|
||||
* require_once 'JSLikeHTMLElement.php';
|
||||
* header('Content-Type: text/plain');
|
||||
* $doc = new DOMDocument();
|
||||
* $doc->registerNodeClass('DOMElement', 'JSLikeHTMLElement');
|
||||
* $doc->loadHTML('<div><p>Para 1</p><p>Para 2</p></div>');
|
||||
* $elem = $doc->getElementsByTagName('div')->item(0);
|
||||
*
|
||||
* // print innerHTML
|
||||
* echo $elem->innerHTML; // prints '<p>Para 1</p><p>Para 2</p>'
|
||||
* echo "\n\n";
|
||||
*
|
||||
* // set innerHTML
|
||||
* $elem->innerHTML = '<a href="http://fivefilters.org">FiveFilters.org</a>';
|
||||
* echo $elem->innerHTML; // prints '<a href="http://fivefilters.org">FiveFilters.org</a>'
|
||||
* echo "\n\n";
|
||||
*
|
||||
* // print document (with our changes)
|
||||
* echo $doc->saveXML();
|
||||
* @endcode
|
||||
*
|
||||
* @author Keyvan Minoukadeh - http://www.keyvan.net - keyvan@keyvan.net
|
||||
* @see http://fivefilters.org (the project this was written for)
|
||||
*/
|
||||
class JSLikeHTMLElement extends DOMElement
|
||||
{
|
||||
/**
|
||||
* Used for setting innerHTML like it's done in JavaScript:
|
||||
* @code
|
||||
* $div->innerHTML = '<h2>Chapter 2</h2><p>The story begins...</p>';
|
||||
* @endcode
|
||||
*/
|
||||
public function __set($name, $value) {
|
||||
if ($name == 'innerHTML') {
|
||||
// first, empty the element
|
||||
for ($x=$this->childNodes->length-1; $x>=0; $x--) {
|
||||
$this->removeChild($this->childNodes->item($x));
|
||||
}
|
||||
// $value holds our new inner HTML
|
||||
if ($value != '') {
|
||||
$f = $this->ownerDocument->createDocumentFragment();
|
||||
// appendXML() expects well-formed markup (XHTML)
|
||||
$result = @$f->appendXML($value); // @ to suppress PHP warnings
|
||||
if ($result) {
|
||||
if ($f->hasChildNodes()) $this->appendChild($f);
|
||||
} else {
|
||||
// $value is probably ill-formed
|
||||
$f = new DOMDocument();
|
||||
$value = mb_convert_encoding($value, 'HTML-ENTITIES', 'UTF-8');
|
||||
// Using <htmlfragment> will generate a warning, but so will bad HTML
|
||||
// (and by this point, bad HTML is what we've got).
|
||||
// We use it (and suppress the warning) because an HTML fragment will
|
||||
// be wrapped around <html><body> tags which we don't really want to keep.
|
||||
// Note: despite the warning, if loadHTML succeeds it will return true.
|
||||
$result = @$f->loadHTML('<htmlfragment>'.$value.'</htmlfragment>');
|
||||
if ($result) {
|
||||
$import = $f->getElementsByTagName('htmlfragment')->item(0);
|
||||
foreach ($import->childNodes as $child) {
|
||||
$importedNode = $this->ownerDocument->importNode($child, true);
|
||||
$this->appendChild($importedNode);
|
||||
}
|
||||
} else {
|
||||
// oh well, we tried, we really did. :(
|
||||
// this element is now empty
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$trace = debug_backtrace();
|
||||
trigger_error('Undefined property via __set(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for getting innerHTML like it's done in JavaScript:
|
||||
* @code
|
||||
* $string = $div->innerHTML;
|
||||
* @endcode
|
||||
*/
|
||||
public function __get($name)
|
||||
{
|
||||
if ($name == 'innerHTML') {
|
||||
$inner = '';
|
||||
foreach ($this->childNodes as $child) {
|
||||
$inner .= $this->ownerDocument->saveXML($child);
|
||||
}
|
||||
return $inner;
|
||||
}
|
||||
|
||||
$trace = debug_backtrace();
|
||||
trigger_error('Undefined property via __get(): '.$name.' in '.$trace[0]['file'].' on line '.$trace[0]['line'], E_USER_NOTICE);
|
||||
return null;
|
||||
}
|
||||
|
||||
public function __toString()
|
||||
{
|
||||
return '['.$this->tagName.']';
|
||||
}
|
||||
}
|
||||
?>
|
||||
1103
inc/Readability.php
Executable file
1103
inc/Readability.php
Executable file
File diff suppressed because it is too large
Load Diff
0
inc/index.html
Executable file
0
inc/index.html
Executable file
1043
inc/rain.tpl.class.php
Executable file
1043
inc/rain.tpl.class.php
Executable file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user