001/* 002 * (C) Copyright 2002-2014 Nuxeo SA (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl-2.1.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 */ 018 019package org.nuxeo.ecm.platform.importer.xml.parser; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.Serializable; 025import java.util.ArrayList; 026import java.util.Collections; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Stack; 031 032import org.apache.commons.io.FileUtils; 033import org.apache.commons.logging.Log; 034import org.apache.commons.logging.LogFactory; 035import org.dom4j.Attribute; 036import org.dom4j.Document; 037import org.dom4j.DocumentException; 038import org.dom4j.Element; 039import org.dom4j.InvalidXPathException; 040import org.dom4j.Node; 041import org.dom4j.Text; 042import org.dom4j.io.SAXReader; 043import org.dom4j.tree.DefaultText; 044import org.mvel2.MVEL; 045import org.nuxeo.common.utils.ZipUtils; 046import org.nuxeo.ecm.automation.AutomationService; 047import org.nuxeo.ecm.automation.OperationContext; 048import org.nuxeo.ecm.automation.OperationException; 049import org.nuxeo.ecm.core.api.Blob; 050import org.nuxeo.ecm.core.api.Blobs; 051import org.nuxeo.ecm.core.api.CoreSession; 052import org.nuxeo.ecm.core.api.DocumentModel; 053import org.nuxeo.ecm.core.api.DocumentNotFoundException; 054import org.nuxeo.ecm.core.api.DocumentRef; 055import org.nuxeo.ecm.core.api.NuxeoException; 056import org.nuxeo.ecm.core.api.PathRef; 057import org.nuxeo.ecm.core.api.model.Property; 058import org.nuxeo.ecm.core.api.model.impl.primitives.BlobProperty; 059import org.nuxeo.ecm.core.schema.types.ListType; 060import org.nuxeo.runtime.api.Framework; 061 062/** 063 * Main implementation class for delivering the Import logic 064 * 065 * @author <a href="mailto:tdelprat@nuxeo.com">Tiry</a> 066 */ 067public class XMLImporterServiceImpl { 068 069 private static final String MSG_NO_ELEMENT_FOUND = "**CREATION**\n" 070 + "No element \"%s\" found in %s, use the DOC_TYPE-INDEX value"; 071 072 private static final String MSG_CREATION = "**CREATION**\n" 073 + "Try to create document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 074 075 private static final String MSG_UPDATE = "**DOCUMENT UPDATE**\n" 076 + "Try to update document in %s with name %s based on \"%s\" fragment " + "with the following conf: %s\n"; 077 078 private static final String MSG_UPDATE_PROPERTY_TRACE = "**PROPERTY UPDATE**\n" 079 + "Value found for %s in %s is \"%s\". With the following conf: %s"; 080 081 private static final String MSG_UPDATE_PROPERTY = "**PROPERTY UPDATE**\n" 082 + "Try to set value into %s property based on %s element on document \"%s\" (%s). Conf activated: %s"; 083 084 public static final Log log = LogFactory.getLog(XMLImporterServiceImpl.class); 085 086 public static final String XML_IMPORTER_INITIALIZATION = "org.nuxeo.xml.importer.initialization"; 087 088 protected CoreSession session; 089 090 protected DocumentModel rootDoc; 091 092 protected Stack<DocumentModel> docsStack; 093 094 protected Map<String, List<String>> deletedAttributes = new HashMap<>(); 095 096 protected Map<String, Object> mvelCtx = new HashMap<>(); 097 098 protected Map<Element, DocumentModel> elToDoc = new HashMap<>(); 099 100 protected ParserConfigRegistry registry; 101 102 protected Boolean deferSave = false; 103 104 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry) { 105 this(rootDoc, registry, null, false); 106 } 107 108 public XMLImporterServiceImpl(DocumentModel rootDoc, ParserConfigRegistry registry, Map<String, Object> mvelContext, boolean deferSave) { 109 if (mvelContext != null) { 110 mvelCtx.putAll(mvelContext); 111 } 112 113 session = rootDoc.getCoreSession(); 114 this.rootDoc = rootDoc; 115 this.deferSave = deferSave; 116 117 docsStack = new Stack<>(); 118 pushInStack(rootDoc); 119 mvelCtx.put("root", rootDoc); 120 mvelCtx.put("docs", docsStack); 121 mvelCtx.put("session", session); 122 123 this.registry = registry; 124 } 125 126 protected ParserConfigRegistry getRegistry() { 127 return registry; 128 } 129 130 protected DocConfigDescriptor getDocCreationConfig(Element el) { 131 for (DocConfigDescriptor conf : getRegistry().getDocCreationConfigs()) { 132 // direct tagName match 133 if (conf.getTagName().equals(el.getName())) { 134 return conf; 135 } else { 136 // try xpath match 137 try { 138 if (el.matches(conf.getTagName())) { 139 return conf; 140 } 141 } catch (InvalidXPathException e) { 142 // NOP 143 } 144 } 145 } 146 return null; 147 } 148 149 protected List<AttributeConfigDescriptor> getAttributConfigs(Element el) { 150 List<AttributeConfigDescriptor> result = new ArrayList<>(); 151 for (AttributeConfigDescriptor conf : getRegistry().getAttributConfigs()) { 152 if (conf.getTagName().equals(el.getName())) { 153 result.add(conf); 154 } else { 155 // try xpath match 156 try { 157 if (el.matches(conf.getTagName())) { 158 result.add(conf); 159 } 160 } catch (InvalidXPathException e) { 161 // NOP 162 } 163 } 164 } 165 return result; 166 } 167 168 protected File workingDirectory; 169 170 private AutomationService automationService; 171 172 public List<DocumentModel> parse(InputStream is) throws IOException { 173 mvelCtx.put("source", is); 174 try { 175 Document doc; 176 doc = new SAXReader().read(is); 177 workingDirectory = null; 178 return parse(doc); 179 } catch (DocumentException e) { 180 throw new IOException(e); 181 } 182 } 183 184 public List<DocumentModel> parse(File file) throws IOException { 185 mvelCtx.put("source", file); 186 187 Document doc = null; 188 File directory = null; 189 try { 190 doc = new SAXReader().read(file); 191 workingDirectory = file.getParentFile(); 192 } catch (DocumentException e) { 193 File tmp = new File(System.getProperty("java.io.tmpdir")); 194 directory = new File(tmp, file.getName() + System.currentTimeMillis()); 195 directory.mkdir(); 196 ZipUtils.unzip(file, directory); 197 for (File child : directory.listFiles()) { 198 if (child.getName().endsWith(".xml")) { 199 return parse(child); 200 } 201 } 202 throw new NuxeoException("Can not find XML file inside the zip archive", e); 203 } finally { 204 FileUtils.deleteQuietly(directory); 205 } 206 return parse(doc); 207 } 208 209 public List<DocumentModel> parse(Document doc) { 210 Element root = doc.getRootElement(); 211 elToDoc = new HashMap<>(); 212 mvelCtx.put("xml", doc); 213 mvelCtx.put("map", elToDoc); 214 process(root); 215 216 // defer saveDocument to end of operation 217 if (deferSave) { 218 ArrayList<DocumentModel> a = new ArrayList<DocumentModel>(); 219 DocumentModel d = null; 220 while(docsStack.size()>0){ 221 d = popStack(); 222 d.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 223 d = session.saveDocument(d); 224 a.add(d); 225 } 226 return a; 227 } else { 228 return new ArrayList<>(docsStack); 229 } 230 } 231 232 protected Object resolveComplex(Element el, AttributeConfigDescriptor conf) { 233 Map<String, Object> propValue = new HashMap<>(); 234 for (String name : conf.getMapping().keySet()) { 235 propValue.put(name, resolveAndEvaluateXmlNode(el, conf.getMapping().get(name))); 236 } 237 238 return propValue; 239 } 240 241 protected Blob resolveBlob(Element el, AttributeConfigDescriptor conf) { 242 @SuppressWarnings("unchecked") 243 Map<String, Object> propValues = (Map<String, Object>) resolveComplex(el, conf); 244 245 if (propValues.containsKey("content")) { 246 try { 247 Blob blob = null; 248 String content = (String) propValues.get("content"); 249 if (content != null && workingDirectory != null) { 250 File file = new File(workingDirectory, content.trim()); 251 if (file.exists()) { 252 blob = Blobs.createBlob(file); 253 } 254 } 255 if (blob == null) { 256 blob = Blobs.createBlob((String) propValues.get("content")); 257 } 258 if (propValues.containsKey("mimetype")) { 259 blob.setMimeType((String) propValues.get("mimetype")); 260 } 261 if (propValues.containsKey("filename")) { 262 blob.setFilename((String) propValues.get("filename")); 263 } 264 return blob; 265 } catch (IOException e) { 266 throw new RuntimeException(e); 267 } 268 } 269 return null; 270 } 271 272 protected void processDocAttributes(DocumentModel doc, Element el, AttributeConfigDescriptor conf) { 273 String targetDocProperty = conf.getTargetDocProperty(); 274 275 if (log.isDebugEnabled()) { 276 log.debug(String.format(MSG_UPDATE_PROPERTY, targetDocProperty, el.getUniquePath(), doc.getPathAsString(), 277 doc.getType(), conf.toString())); 278 } 279 Property property = doc.getProperty(targetDocProperty); 280 281 if (property.isScalar()) { 282 Object value = resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 283 if (log.isTraceEnabled()) { 284 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 285 conf.toString())); 286 } 287 property.setValue(value); 288 289 } else if (property.isComplex()) { 290 291 if (property instanceof BlobProperty) { 292 Object value = resolveBlob(el, conf); 293 if (log.isTraceEnabled()) { 294 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 295 conf.toString())); 296 } 297 property.setValue(value); 298 } else { 299 Object value = resolveComplex(el, conf); 300 if (log.isTraceEnabled()) { 301 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 302 conf.toString())); 303 } 304 property.setValue(value); 305 } 306 307 } else if (property.isList()) { 308 309 ListType lType = (ListType) property.getType(); 310 311 Serializable value; 312 313 if (lType.getFieldType().isSimpleType()) { 314 value = (Serializable) resolveAndEvaluateXmlNode(el, conf.getSingleXpath()); 315 if (value != null) { 316 Object values = property.getValue(); 317 if (values instanceof List) { 318 ((List) values).add(value); 319 property.setValue(values); 320 } else if (values instanceof Object[]) { 321 List<Object> valuesList = new ArrayList<>(); 322 Collections.addAll(valuesList, (Object[]) property.getValue()); 323 valuesList.add(value); 324 property.setValue(valuesList.toArray()); 325 } else { 326 log.error("Simple multi value property " + targetDocProperty 327 + " is neither a List nor an Array"); 328 } 329 } 330 } else { 331 value = (Serializable) resolveComplex(el, conf); 332 if (value != null && !conf.getMapping().isEmpty()) { 333 property.addValue(value); 334 } 335 } 336 337 if (log.isTraceEnabled()) { 338 log.trace(String.format(MSG_UPDATE_PROPERTY_TRACE, targetDocProperty, el.getUniquePath(), value, 339 conf.toString())); 340 } 341 } 342 } 343 344 protected Map<String, Object> getMVELContext(Element el) { 345 mvelCtx.put("currentDocument", docsStack.peek()); 346 mvelCtx.put("currentElement", el); 347 mvelCtx.put("Fn", new MVELImporterFunction(session, docsStack, elToDoc, el)); 348 return mvelCtx; 349 } 350 351 protected Object resolve(Element el, String xpr) { 352 if (xpr == null) { 353 return null; 354 } 355 356 if (xpr.startsWith("#{") && xpr.endsWith("}")) { // MVEL 357 xpr = xpr.substring(2, xpr.length() - 1); 358 return resolveMVEL(el, xpr); 359 } else if (xpr.contains("{{")) { // String containing XPaths 360 StringBuffer sb = new StringBuffer(); 361 int idx = xpr.indexOf("{{"); 362 while (idx >= 0) { 363 int idx2 = xpr.indexOf("}}", idx); 364 if (idx2 > 0) { 365 sb.append(xpr.substring(0, idx)); 366 String xpath = xpr.substring(idx + 2, idx2); 367 sb.append(resolveAndEvaluateXmlNode(el, xpath)); 368 xpr = xpr.substring(idx2); 369 } else { 370 sb.append(xpr); 371 xpr = ""; 372 } 373 idx = xpr.indexOf("{{"); 374 } 375 return sb.toString(); 376 } else { 377 return resolveXP(el, xpr); // default to pure XPATH 378 } 379 } 380 381 protected Object resolveMVEL(Element el, String xpr) { 382 Map<String, Object> ctx = new HashMap<>(getMVELContext(el)); 383 Serializable compiled = MVEL.compileExpression(xpr); 384 return MVEL.executeExpression(compiled, ctx); 385 } 386 387 protected Object resolveXP(Element el, String xpr) { 388 List<Object> nodes = el.selectNodes(xpr); 389 if (nodes.size() == 1) { 390 return nodes.get(0); 391 } else if (nodes.size() > 1) { 392 // Workaround for NXP-11834 393 if (xpr.endsWith("text()")) { 394 String value = ""; 395 for (Object node : nodes) { 396 if (!(node instanceof DefaultText)) { 397 String msg = "Text selector must return a string (expr:\"%s\") element %s"; 398 log.error(String.format(msg, xpr, el.getStringValue())); 399 return value; 400 } 401 value += ((DefaultText) node).getText(); 402 } 403 return new DefaultText(value); 404 } 405 return nodes; 406 } 407 return null; 408 } 409 410 protected String resolvePath(Element el, String xpr) { 411 Object ob = resolve(el, xpr); 412 if (ob == null) { 413 for (int i = 0; i < docsStack.size(); i++) { 414 if (docsStack.get(i).isFolder()) { 415 return docsStack.get(i).getPathAsString(); 416 } 417 } 418 } else { 419 if (ob instanceof DocumentModel) { 420 return ((DocumentModel) ob).getPathAsString(); 421 } else if (ob instanceof Node) { 422 if (ob instanceof Element) { 423 Element targetElement = (Element) ob; 424 DocumentModel target = elToDoc.get(targetElement); 425 if (target != null) { 426 return target.getPathAsString(); 427 } else { 428 return targetElement.getText(); 429 } 430 } else if (ob instanceof Attribute) { 431 return ((Attribute) ob).getValue(); 432 } else if (ob instanceof Text) { 433 return ((Text) ob).getText(); 434 } else if (ob.getClass().isAssignableFrom(Attribute.class)) { 435 return ((Attribute) ob).getValue(); 436 } 437 } else { 438 return ob.toString(); 439 } 440 } 441 return rootDoc.getPathAsString(); 442 } 443 444 protected String resolveName(Element el, String xpr) { 445 Object ob = resolveAndEvaluateXmlNode(el, xpr); 446 if (ob == null) { 447 return null; 448 } 449 return ob.toString(); 450 } 451 452 protected Object resolveAndEvaluateXmlNode(Element el, String xpr) { 453 Object ob = resolve(el, xpr); 454 if (ob == null) { 455 return null; 456 } 457 if (ob instanceof Node) { 458 return ((Node) ob).getText(); 459 } else { 460 return ob; 461 } 462 } 463 464 protected void createNewDocument(Element el, DocConfigDescriptor conf) { 465 DocumentModel doc = session.createDocumentModel(conf.getDocType()); 466 467 String path = resolvePath(el, conf.getParent()); 468 Object nameOb = resolveName(el, conf.getName()); 469 String name = null; 470 if (nameOb == null) { 471 if (log.isDebugEnabled()) { 472 log.debug(String.format(MSG_NO_ELEMENT_FOUND, conf.getName(), el.getUniquePath())); 473 } 474 int idx = 1; 475 for (int i = 0; i < docsStack.size(); i++) { 476 if (docsStack.get(i).getType().equals(conf.getDocType())) { 477 idx++; 478 } 479 } 480 name = conf.getDocType() + "-" + idx; 481 } else { 482 name = nameOb.toString(); 483 } 484 doc.setPathInfo(path, name); 485 486 if (log.isDebugEnabled()) { 487 if (conf.getUpdate()){ 488 log.debug(String.format(MSG_UPDATE, path, name, el.getUniquePath(), conf.toString())); 489 } else { 490 log.debug(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 491 } 492 } 493 494 try { 495 if (conf.getUpdate() && session.exists(doc.getRef())){ 496 DocumentModel existingDoc = session.getDocument(doc.getRef()); 497 498 // get attributes, if attribute needs to be overwritten, empty in the document 499 for (Object e : el.elements()) { 500 List<AttributeConfigDescriptor> configs = getAttributConfigs((Element) e); 501 if (configs != null) { 502 if (!deletedAttributes.containsKey(existingDoc.getId())){ 503 deletedAttributes.put(existingDoc.getId(), new ArrayList<String>()); 504 } 505 for (AttributeConfigDescriptor config : configs) { 506 String targetDocProperty = config.getTargetDocProperty(); 507 // check deletedAttributes for attribute which should be overwritten 508 // if it is there, don't empty it a second time 509 if (config.overwrite && !deletedAttributes.get(existingDoc.getId()).contains(targetDocProperty)){ 510 deletedAttributes.get(existingDoc.getId()).add(targetDocProperty); 511 existingDoc.setPropertyValue(targetDocProperty,new ArrayList<>()); 512 } 513 } 514 } 515 } 516 doc = existingDoc; 517 } else { 518 doc = session.createDocument(doc); 519 } 520 } catch (NuxeoException e) { 521 e.addInfo(String.format(MSG_CREATION, path, name, el.getUniquePath(), conf.toString())); 522 throw e; 523 } 524 pushInStack(doc); 525 elToDoc.put(el, doc); 526 } 527 528 protected void process(Element el) { 529 DocConfigDescriptor createConf = getDocCreationConfig(el); 530 if (createConf != null) { 531 createNewDocument(el, createConf); 532 } 533 List<AttributeConfigDescriptor> configs = getAttributConfigs(el); 534 if (configs != null) { 535 for (AttributeConfigDescriptor config : configs) { 536 processDocAttributes(docsStack.peek(), el, config); 537 } 538 539 DocumentModel doc = popStack(); 540 doc.putContextData(XML_IMPORTER_INITIALIZATION, Boolean.TRUE); 541 if (!deferSave) { 542 doc = session.saveDocument(doc); 543 } 544 pushInStack(doc); 545 546 if (createConf != null) { 547 String chain = createConf.getAutomationChain(); 548 if (chain != null && !"".equals(chain.trim())) { 549 OperationContext ctx = new OperationContext(session, mvelCtx); 550 ctx.setInput(docsStack.peek()); 551 try { 552 getAutomationService().run(ctx, chain); 553 } catch (OperationException e) { 554 throw new NuxeoException(e); 555 } 556 } 557 } 558 } 559 for (Object e : el.elements()) { 560 process((Element) e); 561 } 562 } 563 564 private AutomationService getAutomationService() { 565 if (automationService == null) { 566 automationService = Framework.getLocalService(AutomationService.class); 567 } 568 return automationService; 569 570 } 571 572 private void pushInStack(DocumentModel doc) { 573 mvelCtx.put("changeableDocument", doc); 574 docsStack.push(doc); 575 } 576 577 private DocumentModel popStack() { 578 DocumentModel doc = docsStack.pop(); 579 mvelCtx.put("changeableDocument", doc); 580 return doc; 581 } 582 583}