001/* 002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.platform.htmlsanitizer; 020 021import java.io.InputStream; 022import java.io.Serializable; 023import java.util.ArrayList; 024import java.util.Iterator; 025import java.util.LinkedList; 026import java.util.List; 027 028import org.apache.commons.logging.Log; 029import org.apache.commons.logging.LogFactory; 030import org.nuxeo.ecm.core.api.DocumentModel; 031import org.nuxeo.ecm.core.api.model.Property; 032import org.nuxeo.ecm.core.api.model.PropertyNotFoundException; 033import org.nuxeo.runtime.model.ComponentInstance; 034import org.nuxeo.runtime.model.DefaultComponent; 035import org.owasp.validator.html.AntiSamy; 036import org.owasp.validator.html.CleanResults; 037import org.owasp.validator.html.Policy; 038import org.owasp.validator.html.PolicyException; 039import org.owasp.validator.html.ScanException; 040 041/** 042 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them. 043 */ 044public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService { 045 046 private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class); 047 048 public static final String ANTISAMY_XP = "antisamy"; 049 050 public static final String SANITIZER_XP = "sanitizer"; 051 052 /** All policies registered. */ 053 public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<HtmlSanitizerAntiSamyDescriptor>(); 054 055 /** Effective policy. */ 056 public Policy policy; 057 058 /** All sanitizers registered. */ 059 public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<HtmlSanitizerDescriptor>(1); 060 061 /** Effective sanitizers. */ 062 public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<HtmlSanitizerDescriptor>(1); 063 064 @Override 065 public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 066 if (ANTISAMY_XP.equals(extensionPoint)) { 067 if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) { 068 log.error("Contribution " + contribution + " is not of type " 069 + HtmlSanitizerAntiSamyDescriptor.class.getName()); 070 return; 071 } 072 HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution; 073 log.info("Registering AntiSamy policy: " + desc.policy); 074 addAntiSamy(desc); 075 } else if (SANITIZER_XP.equals(extensionPoint)) { 076 if (!(contribution instanceof HtmlSanitizerDescriptor)) { 077 log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName()); 078 return; 079 } 080 HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution; 081 log.info("Registering HTML sanitizer: " + desc); 082 addSanitizer(desc); 083 } else { 084 log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint); 085 } 086 } 087 088 @Override 089 public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) { 090 if (ANTISAMY_XP.equals(extensionPoint)) { 091 if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) { 092 return; 093 } 094 HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution; 095 log.info("Unregistering AntiSamy policy: " + desc.policy); 096 removeAntiSamy(desc); 097 } else if (SANITIZER_XP.equals(extensionPoint)) { 098 if (!(contribution instanceof HtmlSanitizerDescriptor)) { 099 return; 100 } 101 HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution; 102 log.info("Unregistering HTML sanitizer: " + desc); 103 removeSanitizer(desc); 104 } 105 } 106 107 protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) { 108 if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) { 109 log.error("Cannot find AntiSamy policy: " + desc.policy); 110 return; 111 } 112 allPolicies.add(desc); 113 refreshPolicy(); 114 } 115 116 protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) { 117 allPolicies.remove(desc); 118 refreshPolicy(); 119 } 120 121 protected void refreshPolicy() { 122 if (allPolicies.isEmpty()) { 123 policy = null; 124 } else { 125 HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast(); 126 InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy); 127 try { 128 policy = Policy.getInstance(is); 129 } catch (PolicyException e) { 130 policy = null; 131 throw new RuntimeException("Cannot parse AntiSamy policy: " + desc.policy, e); 132 } 133 } 134 } 135 136 protected Policy getPolicy() { 137 return policy; 138 } 139 140 protected void addSanitizer(HtmlSanitizerDescriptor desc) { 141 if (desc.fields.isEmpty()) { 142 log.error("Sanitizer has no fields: " + desc); 143 return; 144 } 145 allSanitizers.add(desc); 146 refreshSanitizers(); 147 } 148 149 protected void removeSanitizer(HtmlSanitizerDescriptor desc) { 150 allSanitizers.remove(desc); 151 refreshSanitizers(); 152 } 153 154 protected void refreshSanitizers() { 155 // not very efficient algorithm but who cares? 156 sanitizers.clear(); 157 for (HtmlSanitizerDescriptor sanitizer : allSanitizers) { 158 // remove existing with same name 159 for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) { 160 HtmlSanitizerDescriptor s = it.next(); 161 if (s.name.equals(sanitizer.name)) { 162 it.remove(); 163 break; 164 } 165 } 166 // add new one if enabled 167 if (sanitizer.enabled) { 168 sanitizers.add(sanitizer); 169 } 170 } 171 } 172 173 protected List<HtmlSanitizerDescriptor> getSanitizers() { 174 return sanitizers; 175 } 176 177 // ----- HtmlSanitizerService ----- 178 179 @Override 180 public void sanitizeDocument(DocumentModel doc) { 181 if (policy == null) { 182 log.error("Cannot sanitize, no policy registered"); 183 return; 184 } 185 for (HtmlSanitizerDescriptor sanitizer : sanitizers) { 186 if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) { 187 continue; 188 } 189 for (FieldDescriptor field : sanitizer.fields) { 190 String fieldName = field.getContentField(); 191 String filterField = field.getFilterField(); 192 if (filterField != null) { 193 Property filterProp; 194 try { 195 filterProp = doc.getProperty(filterField); 196 } catch (PropertyNotFoundException e) { 197 continue; 198 } 199 if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) { 200 continue; 201 } 202 } 203 Property prop; 204 try { 205 prop = doc.getProperty(fieldName); 206 } catch (PropertyNotFoundException e) { 207 continue; 208 } 209 Serializable value = prop.getValue(); 210 if (value == null) { 211 continue; 212 } 213 if (!(value instanceof String)) { 214 log.debug("Cannot sanitize non-string field: " + field); 215 continue; 216 } 217 String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field; 218 String newValue = sanitizeString((String) value, info); 219 if (!newValue.equals(value)) { 220 prop.setValue(newValue); 221 } 222 } 223 } 224 } 225 226 @Override 227 public String sanitizeString(String string, String info) { 228 if (policy == null) { 229 log.error("Cannot sanitize, no policy registered"); 230 return string; 231 } 232 try { 233 CleanResults cr = new AntiSamy().scan(string, policy); 234 for (Object err : cr.getErrorMessages()) { 235 log.debug(String.format("Sanitizing %s: %s", info == null ? "" : info, err)); 236 } 237 return cr.getCleanHTML(); 238 } catch (ScanException | PolicyException e) { 239 log.error(String.format("Cannot sanitize %s: %s", info == null ? "" : info, e)); 240 return string; 241 } 242 } 243 244}