001/*
002 * (C) Copyright 2006-2010 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.platform.htmlsanitizer;
020
021import java.io.IOException;
022import java.io.Serializable;
023import java.net.URL;
024import java.util.ArrayList;
025import java.util.Iterator;
026import java.util.LinkedList;
027import java.util.List;
028
029import org.apache.commons.logging.Log;
030import org.apache.commons.logging.LogFactory;
031import org.nuxeo.ecm.core.api.DocumentModel;
032import org.nuxeo.ecm.core.api.NuxeoException;
033import org.nuxeo.ecm.core.api.model.Property;
034import org.nuxeo.ecm.core.api.model.PropertyNotFoundException;
035import org.nuxeo.runtime.model.ComponentInstance;
036import org.nuxeo.runtime.model.DefaultComponent;
037import org.owasp.html.HtmlPolicyBuilder;
038import org.owasp.html.PolicyFactory;
039
040/**
041 * Service that sanitizes some HMTL fields to remove potential cross-site scripting attacks in them.
042 */
043public class HtmlSanitizerServiceImpl extends DefaultComponent implements HtmlSanitizerService {
044
045    private static final Log log = LogFactory.getLog(HtmlSanitizerServiceImpl.class);
046
047    public static final String ANTISAMY_XP = "antisamy";
048
049    public static final String SANITIZER_XP = "sanitizer";
050
051    /** All policies registered. */
052    public LinkedList<HtmlSanitizerAntiSamyDescriptor> allPolicies = new LinkedList<HtmlSanitizerAntiSamyDescriptor>();
053
054    /** Effective policy. */
055    public PolicyFactory policy;
056
057    /** All sanitizers registered. */
058    public List<HtmlSanitizerDescriptor> allSanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
059
060    /** Effective sanitizers. */
061    public List<HtmlSanitizerDescriptor> sanitizers = new ArrayList<HtmlSanitizerDescriptor>(1);
062
063    @Override
064    public void registerContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
065        if (ANTISAMY_XP.equals(extensionPoint)) {
066            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
067                log.error("Contribution " + contribution + " is not of type "
068                        + HtmlSanitizerAntiSamyDescriptor.class.getName());
069                return;
070            }
071            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
072            log.info("Registering AntiSamy policy: " + desc.policy);
073            addAntiSamy(desc);
074        } else if (SANITIZER_XP.equals(extensionPoint)) {
075            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
076                log.error("Contribution " + contribution + " is not of type " + HtmlSanitizerDescriptor.class.getName());
077                return;
078            }
079            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
080            log.info("Registering HTML sanitizer: " + desc);
081            addSanitizer(desc);
082        } else {
083            log.error("Contribution extension point should be '" + SANITIZER_XP + "' but is: " + extensionPoint);
084        }
085    }
086
087    @Override
088    public void unregisterContribution(Object contribution, String extensionPoint, ComponentInstance contributor) {
089        if (ANTISAMY_XP.equals(extensionPoint)) {
090            if (!(contribution instanceof HtmlSanitizerAntiSamyDescriptor)) {
091                return;
092            }
093            HtmlSanitizerAntiSamyDescriptor desc = (HtmlSanitizerAntiSamyDescriptor) contribution;
094            log.info("Unregistering AntiSamy policy: " + desc.policy);
095            removeAntiSamy(desc);
096        } else if (SANITIZER_XP.equals(extensionPoint)) {
097            if (!(contribution instanceof HtmlSanitizerDescriptor)) {
098                return;
099            }
100            HtmlSanitizerDescriptor desc = (HtmlSanitizerDescriptor) contribution;
101            log.info("Unregistering HTML sanitizer: " + desc);
102            removeSanitizer(desc);
103        }
104    }
105
106    protected void addAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
107        if (Thread.currentThread().getContextClassLoader().getResourceAsStream(desc.policy) == null) {
108            log.error("Cannot find AntiSamy policy: " + desc.policy);
109            return;
110        }
111        allPolicies.add(desc);
112        refreshPolicy();
113    }
114
115    protected void removeAntiSamy(HtmlSanitizerAntiSamyDescriptor desc) {
116        allPolicies.remove(desc);
117        refreshPolicy();
118    }
119
120    protected void refreshPolicy() {
121        if (allPolicies.isEmpty()) {
122            policy = null;
123        } else {
124            HtmlSanitizerAntiSamyDescriptor desc = allPolicies.removeLast();
125            URL url = Thread.currentThread().getContextClassLoader().getResource(desc.policy);
126            HtmlPolicyBuilder builder = new HtmlPolicyBuilder();
127            try {
128                builder.loadAntiSamyPolicy(url);
129                initializeBuilder(builder);
130                policy = builder.toFactory();
131            } catch (IOException e) {
132                policy = null;
133                throw new NuxeoException("Cannot parse AntiSamy policy: " + desc.policy, e);
134            }
135        }
136    }
137
138    protected void initializeBuilder(HtmlPolicyBuilder builder) {
139        builder.allowStandardUrlProtocols();
140        builder.allowStyling();
141        builder.disallowElements("script");
142    }
143
144    protected void addSanitizer(HtmlSanitizerDescriptor desc) {
145        if (desc.fields.isEmpty()) {
146            log.error("Sanitizer has no fields: " + desc);
147            return;
148        }
149        allSanitizers.add(desc);
150        refreshSanitizers();
151    }
152
153    protected void removeSanitizer(HtmlSanitizerDescriptor desc) {
154        allSanitizers.remove(desc);
155        refreshSanitizers();
156    }
157
158    protected void refreshSanitizers() {
159        // not very efficient algorithm but who cares?
160        sanitizers.clear();
161        for (HtmlSanitizerDescriptor sanitizer : allSanitizers) {
162            // remove existing with same name
163            for (Iterator<HtmlSanitizerDescriptor> it = sanitizers.iterator(); it.hasNext();) {
164                HtmlSanitizerDescriptor s = it.next();
165                if (s.name.equals(sanitizer.name)) {
166                    it.remove();
167                    break;
168                }
169            }
170            // add new one if enabled
171            if (sanitizer.enabled) {
172                sanitizers.add(sanitizer);
173            }
174        }
175    }
176
177    protected List<HtmlSanitizerDescriptor> getSanitizers() {
178        return sanitizers;
179    }
180
181    // ----- HtmlSanitizerService -----
182
183    @Override
184    public void sanitizeDocument(DocumentModel doc) {
185        if (policy == null) {
186            log.error("Cannot sanitize, no policy registered");
187            return;
188        }
189        for (HtmlSanitizerDescriptor sanitizer : sanitizers) {
190            if (!sanitizer.types.isEmpty() && !sanitizer.types.contains(doc.getType())) {
191                continue;
192            }
193            for (FieldDescriptor field : sanitizer.fields) {
194                String fieldName = field.getContentField();
195                String filterField = field.getFilterField();
196                if (filterField != null) {
197                    Property filterProp;
198                    try {
199                        filterProp = doc.getProperty(filterField);
200                    } catch (PropertyNotFoundException e) {
201                        continue;
202                    }
203                    if (field.match(String.valueOf(filterProp.getValue())) != field.doSanitize()) {
204                        continue;
205                    }
206                }
207                Property prop;
208                try {
209                    prop = doc.getProperty(fieldName);
210                } catch (PropertyNotFoundException e) {
211                    continue;
212                }
213                Serializable value = prop.getValue();
214                if (value == null) {
215                    continue;
216                }
217                if (!(value instanceof String)) {
218                    log.debug("Cannot sanitize non-string field: " + field);
219                    continue;
220                }
221                String info = "doc " + doc.getPathAsString() + " (" + doc.getId() + ") field " + field;
222                String newValue = sanitizeString((String) value, info);
223                if (!newValue.equals(value)) {
224                    prop.setValue(newValue);
225                }
226            }
227        }
228    }
229
230    @Override
231    public String sanitizeString(String string, String info) {
232        if (policy == null) {
233            log.error("Cannot sanitize, no policy registered");
234            return string;
235        }
236        return policy.sanitize(string);
237    }
238
239}