001/*
002 * (C) Copyright 2002-2014 Nuxeo SA (http://nuxeo.com/) and contributors.
003 *
004 * All rights reserved. This program and the accompanying materials
005 * are made available under the terms of the GNU Lesser General Public License
006 * (LGPL) version 2.1 which accompanies this distribution, and is available at
007 * http://www.gnu.org/licenses/lgpl-2.1.html
008 *
009 * This library is distributed in the hope that it will be useful,
010 * but WITHOUT ANY WARRANTY; without even the implied warranty of
011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
012 * Lesser General Public License for more details.
013 *
014 * Contributors:
015 *     Nuxeo - initial API and implementation
016 *
017 */
018
019package org.nuxeo.ecm.platform.importer.xml.parser;
020
021import java.io.File;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.Serializable;
025import java.util.ArrayList;
026import java.util.Collections;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030import java.util.Stack;
031
032import org.apache.commons.io.FileUtils;
033import org.apache.commons.logging.Log;
034import org.apache.commons.logging.LogFactory;
035import org.dom4j.Attribute;
036import org.dom4j.Document;
037import org.dom4j.DocumentException;
038import org.dom4j.Element;
039import org.dom4j.InvalidXPathException;
040import org.dom4j.Node;
041import org.dom4j.Text;
042import org.dom4j.io.SAXReader;
043import org.dom4j.tree.DefaultText;
044import org.mvel2.MVEL;
045import org.nuxeo.common.utils.ZipUtils;
046import org.nuxeo.ecm.automation.AutomationService;
047import org.nuxeo.ecm.automation.OperationContext;
048import org.nuxeo.ecm.automation.OperationException;
049import org.nuxeo.ecm.core.api.Blob;
050import org.nuxeo.ecm.core.api.Blobs;
051import org.nuxeo.ecm.core.api.CoreSession;
052import org.nuxeo.ecm.core.api.DocumentModel;
053import org.nuxeo.ecm.core.api.DocumentNotFoundException;
054import org.nuxeo.ecm.core.api.DocumentRef;
055import org.nuxeo.ecm.core.api.NuxeoException;
056import org.nuxeo.ecm.core.api.PathRef;
057import org.nuxeo.ecm.core.api.model.Property;
058import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty;
059import org.nuxeo.ecm.core.schema.types.ListType;
060import org.nuxeo.runtime.api.Framework;
061
062/**
063 * Main implementation class for delivering the Import logic
064 *
065 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a>
066 */
067public class XMLImporterServiceImpl {
068
069    private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n"
070            + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value";
071
072    private static final String MSG_CREATION = "**CREATION**\n"
073            + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
074
075    private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n"
076            + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n";
077    
078    private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n"
079            + "Value found for %s in %s is \"%s\". With the following conf: %s";
080
081    private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n"
082            + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s";
083
084    public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class);
085
086    public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization";
087
088    protected CoreSession session;
089
090    protected DocumentModel rootDoc;
091
092    protected Stack<DocumentModel> docsStack;
093    
094    protected Map<String, List<String>> deletedAttributes = new HashMap<>();
095
096    protected Map<String, Object> mvelCtx = new HashMap<>();
097
098    protected Map<Element, DocumentModel> elToDoc = new HashMap<>();
099
100    protected ParserConfigRegistry registry;
101    
102    protected Boolean deferSave = false;
103
104    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) {
105        this(rootDoc, registry, null, false);
106    }
107
108    public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, Map<String, Object> mvelContext, boolean deferSave) {
109        if (mvelContext != null) {
110            mvelCtx.putAll(mvelContext);
111        }
112
113        session = rootDoc.getCoreSession();
114        this.rootDoc = rootDoc;
115        this.deferSave = deferSave;
116        
117        docsStack = new Stack<>();
118        pushInStack(rootDoc);
119        mvelCtx.put("root", rootDoc);
120        mvelCtx.put("docs", docsStack);
121        mvelCtx.put("session", session);
122
123        this.registry = registry;
124    }
125
126    protected ParserConfigRegistry getRegistry() {
127        return registry;
128    }
129
130    protected DocConfigDescriptor getDocCreationConfig(Element el) {
131        for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) {
132            // direct tagName match
133            if (conf.getTagName().equals(el.getName())) {
134                return conf;
135            } else {
136                // try xpath match
137                try {
138                    if (el.matches(conf.getTagName())) {
139                        return conf;
140                    }
141                } catch (InvalidXPathException e) {
142                    // NOP
143                }
144            }
145        }
146        return null;
147    }
148
149    protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) {
150        List<AttributeConfigDescriptor> result = new ArrayList<>();
151        for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) {
152            if (conf.getTagName().equals(el.getName())) {
153                result.add(conf);
154            } else {
155                // try xpath match
156                try {
157                    if (el.matches(conf.getTagName())) {
158                        result.add(conf);
159                    }
160                } catch (InvalidXPathException e) {
161                    // NOP
162                }
163            }
164        }
165        return result;
166    }
167
168    protected File workingDirectory;
169
170    private AutomationService automationService;
171
172    public List<DocumentModel> parse(InputStream is) throws IOException {
173        mvelCtx.put("source", is);
174        try {
175            Document doc;
176            doc = new SAXReader().read(is);
177            workingDirectory = null;
178            return parse(doc);
179        } catch (DocumentException e) {
180            throw new IOException(e);
181        }
182    }
183
184    public List<DocumentModel> parse(File file) throws IOException {
185        mvelCtx.put("source", file);
186
187        Document doc = null;
188        File directory = null;
189        try {
190            doc = new SAXReader().read(file);
191            workingDirectory = file.getParentFile();
192        } catch (DocumentException e) {
193            File tmp = new File(System.getProperty("java.io.tmpdir"));
194            directory = new File(tmp, file.getName() + System.currentTimeMillis());
195            directory.mkdir();
196            ZipUtils.unzip(file, directory);
197            for (File child : directory.listFiles()) {
198                if (child.getName().endsWith(".xml")) {
199                    return parse(child);
200                }
201            }
202            throw new NuxeoException("Can not find XML file inside the zip archive", e);
203        } finally {
204            FileUtils.deleteQuietly(directory);
205        }
206        return parse(doc);
207    }
208
209    public List<DocumentModel> parse(Document doc) {
210        Element root = doc.getRootElement();
211        elToDoc = new HashMap<>();
212        mvelCtx.put("xml", doc);
213        mvelCtx.put("map", elToDoc);
214        process(root);
215        
216        // defer saveDocument to end of operation
217        if (deferSave) {
218                ArrayList<DocumentModel> a = new ArrayList<DocumentModel>();
219                DocumentModel d = null;
220                while(docsStack.size()>0){
221                        d = popStack();
222                        d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
223                        d = session.saveDocument(d);
224                        a.add(d);
225                }        
226                return a;
227        } else {
228                return new ArrayList<>(docsStack);
229        }
230    }
231
232    protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) {
233        Map<String, Object> propValue = new HashMap<>();
234        for (String name : conf.getMapping().keySet()) {
235            propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name)));
236        }
237
238        return propValue;
239    }
240
241    protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf) {
242        @SuppressWarnings("unchecked")
243        Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf);
244
245        if (propValues.containsKey("content")) {
246            try {
247                Blob blob = null;
248                String content = (String) propValues.get("content");
249                if (content != null && workingDirectory != null) {
250                    File file = new File(workingDirectory, content.trim());
251                    if (file.exists()) {
252                        blob = Blobs.createBlob(file);
253                    }
254                }
255                if (blob == null) {
256                    blob = Blobs.createBlob((String) propValues.get("content"));
257                }
258                if (propValues.containsKey("mimetype")) {
259                    blob.setMimeType((String) propValues.get("mimetype"));
260                }
261                if (propValues.containsKey("filename")) {
262                    blob.setFilename((String) propValues.get("filename"));
263                }
264                return blob;
265            } catch (IOException e) {
266                throw new RuntimeException(e);
267            }
268        }
269        return null;
270    }
271
272    protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) {
273        String targetDocProperty = conf.getTargetDocProperty();
274
275        if (log.isDebugEnabled()) {
276            log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(),
277                    doc.getType(), conf.toString()));
278        }
279        Property property = doc.getProperty(targetDocProperty);
280
281        if (property.isScalar()) {
282            Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
283            if (log.isTraceEnabled()) {
284                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
285                        conf.toString()));
286            }
287            property.setValue(value);
288
289        } else if (property.isComplex()) {
290
291            if (property instanceof BlobProperty) {
292                Object value = resolveBlob(el, conf);
293                if (log.isTraceEnabled()) {
294                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
295                            conf.toString()));
296                }
297                property.setValue(value);
298            } else {
299                Object value = resolveComplex(el, conf);
300                if (log.isTraceEnabled()) {
301                    log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
302                            conf.toString()));
303                }
304                property.setValue(value);
305            }
306
307        } else if (property.isList()) {
308
309            ListType lType = (ListType) property.getType();
310
311            Serializable value;
312
313            if (lType.getFieldType().isSimpleType()) {
314                value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath());
315                if (value != null) {
316                    Object values = property.getValue();
317                    if (values instanceof List) {
318                        ((List) values).add(value);
319                        property.setValue(values);
320                    } else if (values instanceof Object[]) {
321                        List<Object> valuesList = new ArrayList<>();
322                        Collections.addAll(valuesList, (Object[]) property.getValue());
323                        valuesList.add(value);
324                        property.setValue(valuesList.toArray());
325                    } else {
326                        log.error("Simple multi value property " + targetDocProperty
327                                + " is neither a List nor an Array");
328                    }
329                }
330            } else {
331                value = (Serializable) resolveComplex(el, conf);
332                if (value != null && !conf.getMapping().isEmpty()) {
333                    property.addValue(value);
334                }
335            }
336
337            if (log.isTraceEnabled()) {
338                log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value,
339                        conf.toString()));
340            }
341        }
342    }
343
344    protected Map<String, Object> getMVELContext(Element el) {
345        mvelCtx.put("currentDocument", docsStack.peek());
346        mvelCtx.put("currentElement", el);
347        mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el));
348        return mvelCtx;
349    }
350
351    protected Object resolve(Element el, String xpr) {
352        if (xpr == null) {
353            return null;
354        }
355
356        if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL
357            xpr = xpr.substring(2, xpr.length() - 1);
358            return resolveMVEL(el, xpr);
359        } else if (xpr.contains("{{")) { // String containing XPaths
360            StringBuffer sb = new StringBuffer();
361            int idx = xpr.indexOf("{{");
362            while (idx >= 0) {
363                int idx2 = xpr.indexOf("}}", idx);
364                if (idx2 > 0) {
365                    sb.append(xpr.substring(0, idx));
366                    String xpath = xpr.substring(idx + 2, idx2);
367                    sb.append(resolveAndEvaluateXmlNode(el, xpath));
368                    xpr = xpr.substring(idx2);
369                } else {
370                    sb.append(xpr);
371                    xpr = "";
372                }
373                idx = xpr.indexOf("{{");
374            }
375            return sb.toString();
376        } else {
377            return resolveXP(el, xpr); // default to pure XPATH
378        }
379    }
380
381    protected Object resolveMVEL(Element el, String xpr) {
382        Map<String, Object> ctx = new HashMap<>(getMVELContext(el));
383        Serializable compiled = MVEL.compileExpression(xpr);
384        return MVEL.executeExpression(compiled, ctx);
385    }
386
387    protected Object resolveXP(Element el, String xpr) {
388        List<Object> nodes = el.selectNodes(xpr);
389        if (nodes.size() == 1) {
390            return nodes.get(0);
391        } else if (nodes.size() > 1) {
392            // Workaround for NXP-11834
393            if (xpr.endsWith("text()")) {
394                String value = "";
395                for (Object node : nodes) {
396                    if (!(node instanceof DefaultText)) {
397                        String msg = "Text selector must return a string (expr:\"%s\") element %s";
398                        log.error(String.format(msg, xpr, el.getStringValue()));
399                        return value;
400                    }
401                    value += ((DefaultText) node).getText();
402                }
403                return new DefaultText(value);
404            }
405            return nodes;
406        }
407        return null;
408    }
409
410    protected String resolvePath(Element el, String xpr) {
411        Object ob = resolve(el, xpr);
412        if (ob == null) {
413            for (int i = 0; i < docsStack.size(); i++) {
414                if (docsStack.get(i).isFolder()) {
415                    return docsStack.get(i).getPathAsString();
416                }
417            }
418        } else {
419            if (ob instanceof DocumentModel) {
420                return ((DocumentModel) ob).getPathAsString();
421            } else if (ob instanceof Node) {
422                if (ob instanceof Element) {
423                    Element targetElement = (Element) ob;
424                    DocumentModel target = elToDoc.get(targetElement);
425                    if (target != null) {
426                        return target.getPathAsString();
427                    } else {
428                        return targetElement.getText();
429                    }
430                } else if (ob instanceof Attribute) {
431                    return ((Attribute) ob).getValue();
432                } else if (ob instanceof Text) {
433                    return ((Text) ob).getText();
434                } else if (ob.getClass().isAssignableFrom(Attribute.class)) {
435                    return ((Attribute) ob).getValue();
436                }
437            } else {
438                return ob.toString();
439            }
440        }
441        return rootDoc.getPathAsString();
442    }
443
444    protected String resolveName(Element el, String xpr) {
445        Object ob = resolveAndEvaluateXmlNode(el, xpr);
446        if (ob == null) {
447            return null;
448        }
449        return ob.toString();
450    }
451
452    protected Object resolveAndEvaluateXmlNode(Element el, String xpr) {
453        Object ob = resolve(el, xpr);
454        if (ob == null) {
455            return null;
456        }
457        if (ob instanceof Node) {
458            return ((Node) ob).getText();
459        } else {
460            return ob;
461        }
462    }
463
464    protected void createNewDocument(Element el, DocConfigDescriptor conf) {
465        DocumentModel doc = session.createDocumentModel(conf.getDocType());
466
467        String path = resolvePath(el, conf.getParent());
468        Object nameOb = resolveName(el, conf.getName());
469        String name = null;
470        if (nameOb == null) {
471                if (log.isDebugEnabled()) {
472                        log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath()));
473                }
474                int idx = 1;
475                for (int i = 0; i < docsStack.size(); i++) {
476                        if (docsStack.get(i).getType().equals(conf.getDocType())) {
477                                idx++;
478                        }
479                }
480                name = conf.getDocType() + "-" + idx;
481        } else {
482                name = nameOb.toString();
483        }
484        doc.setPathInfo(path, name);
485
486        if (log.isDebugEnabled()) {
487                if (conf.getUpdate()){
488                        log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString()));
489                } else {
490                        log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
491                }
492        }
493
494        try {
495                if (conf.getUpdate() && session.exists(doc.getRef())){
496                        DocumentModel existingDoc = session.getDocument(doc.getRef());
497                        
498                        // get attributes, if attribute needs to be overwritten, empty in the document
499                        for (Object e : el.elements()) {
500                                List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e);
501                                if (configs != null) {
502                                                if (!deletedAttributes.containsKey(existingDoc.getId())){
503                                                        deletedAttributes.put(existingDoc.getId(), new ArrayList<String>());
504                                                }
505                                        for (AttributeConfigDescriptor config : configs) {
506                                                        String targetDocProperty = config.getTargetDocProperty();
507                                                // check deletedAttributes for attribute which should be overwritten
508                                                        // if it is there, don't empty it a second time
509                                                        if (config.overwrite && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)){
510                                                        deletedAttributes.get(existingDoc.getId()).add(targetDocProperty); 
511                                                        existingDoc.setPropertyValue(targetDocProperty,new ArrayList<>());
512                                                }
513                                        }
514                                } 
515                        }
516                        doc = existingDoc;
517                } else {
518                        doc = session.createDocument(doc);
519                }
520        } catch (NuxeoException e) {
521                e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString()));
522                throw e;
523        }
524        pushInStack(doc);
525        elToDoc.put(el, doc);
526    }
527
528    protected void process(Element el) {
529        DocConfigDescriptor createConf = getDocCreationConfig(el);
530        if (createConf != null) {
531                createNewDocument(el, createConf);
532        }
533        List<AttributeConfigDescriptor> configs = getAttributConfigs(el);
534        if (configs != null) {
535            for (AttributeConfigDescriptor config : configs) {
536                processDocAttributes(docsStack.peek(), el, config);
537            }
538
539            DocumentModel doc = popStack();
540            doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE);
541            if (!deferSave) {
542                doc = session.saveDocument(doc);
543            }
544            pushInStack(doc);
545
546            if (createConf != null) {
547                String chain = createConf.getAutomationChain();
548                if (chain != null && !"".equals(chain.trim())) {
549                    OperationContext ctx = new OperationContext(session, mvelCtx);
550                    ctx.setInput(docsStack.peek());
551                    try {
552                        getAutomationService().run(ctx, chain);
553                    } catch (OperationException e) {
554                        throw new NuxeoException(e);
555                    }
556                }
557            }
558        }
559        for (Object e : el.elements()) {
560            process((Element) e);
561        }
562    }
563
564    private AutomationService getAutomationService() {
565        if (automationService == null) {
566            automationService = Framework.getLocalService(AutomationService.class);
567        }
568        return automationService;
569
570    }
571
572    private void pushInStack(DocumentModel doc) {
573        mvelCtx.put("changeableDocument", doc);
574        docsStack.push(doc);
575    }
576
577    private DocumentModel popStack() {
578        DocumentModel doc = docsStack.pop();
579        mvelCtx.put("changeableDocument", doc);
580        return doc;
581    }
582    
583}