Mercurial > hg > extraction-interface
comparison js/taggingtext.js @ 87:fb5049fc5dd7 extractapp_dev
New:(1)UI(2)generate simple regex by examples(3)coordinates in book metadata
author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 02 Jun 2015 16:52:05 +0200 |
parents | f1f849d31272 |
children | e681d693240e |
comparison
equal
deleted
inserted
replaced
86:0141df465205 | 87:fb5049fc5dd7 |
---|---|
236 } | 236 } |
237 } | 237 } |
238 | 238 |
239 | 239 |
240 function removeTagNewDiv( eventObject, tagName, tagObject ) { | 240 function removeTagNewDiv( eventObject, tagName, tagObject ) { |
241 // TODO: nesting tag representaion | |
242 | |
241 saveUndoText(); | 243 saveUndoText(); |
242 var newdiv = document.createElement("div"); | 244 var newdiv = document.createElement("div"); |
243 newdiv.id = "questionMarkId"; | 245 $(newdiv).id = "questionMarkId"; |
244 newdiv.setAttribute("class", "questionMarkClass"); | 246 $(newdiv).attr("class", "questionMarkClass"); |
245 | 247 |
246 // set z-index to 3 to bring popup tag windwo to front | 248 // set z-index to 3 to bring popup tag windwo to front |
247 newdiv.style.cssText = 'top:'+eventObject.pageY+'; left:'+eventObject.pageX+'; position:absolute; background-color: white; border:1px solid black; padding: 5px; z-index:3'; | 249 newdiv.style.cssText = 'top:'+eventObject.pageY+'; left:'+eventObject.pageX+';'; |
250 | |
248 newdiv.innerHTML = "Tag: "+tagName+"<br>Value: "+tagObject.text()+"<br>"; | 251 newdiv.innerHTML = "Tag: "+tagName+"<br>Value: "+tagObject.text()+"<br>"; |
249 | 252 |
250 var newbutton = $('<button>Remove this</button>').mouseup(function (e2) { | 253 var newbutton = $('<button class="btn btn-default">Remove this</button>').mouseup(function (e2) { |
251 var textKeep = $(this).parent().parent().html(); | 254 var textKeep = $(this).parent().parent().html(); |
252 var regexText=/<div(.*?)<\/div>/g; | 255 var regexText=/<div(.*?)<\/div>/g; |
253 var replaceText=""; | 256 var replaceText=""; |
254 textKeep = textKeep.replace(regexText, replaceText); | 257 textKeep = textKeep.replace(regexText, replaceText); |
255 | 258 |
256 $(this).parent().parent().replaceWith( textKeep ); | 259 $(this).parent().parent().replaceWith( textKeep ); |
257 }); | 260 }); |
258 newbutton.appendTo(newdiv); | 261 newbutton.appendTo(newdiv); |
259 | 262 |
260 var newbutton = $('<button>Remove this(with newline)</button>').mouseup(function (e2) { | 263 var newbutton = $('<button class="btn btn-default">Remove this(with newline)</button>').mouseup(function (e2) { |
261 var textKeep = $(this).parent().parent().html(); | 264 var textKeep = $(this).parent().parent().html(); |
262 var regexText=/<div(.*?)<\/div>/g; | 265 var regexText=/<div(.*?)<\/div>/g; |
263 var replaceText=""; | 266 var replaceText=""; |
264 textKeep = textKeep.replace(regexText, replaceText); | 267 textKeep = textKeep.replace(regexText, replaceText); |
265 | 268 |
269 } | 272 } |
270 $(this).parent().parent().replaceWith( textKeep ); | 273 $(this).parent().parent().replaceWith( textKeep ); |
271 }); | 274 }); |
272 newbutton.appendTo(newdiv); | 275 newbutton.appendTo(newdiv); |
273 | 276 |
274 var newbutton = $('<button>Remove all</button>').mouseup(function (e2) { | 277 var newbutton = $('<button class="btn btn-default">Remove all</button>').mouseup(function (e2) { |
275 var textKeep = $(this).parent().parent().html(); | 278 var textKeep = $(this).parent().parent().html(); |
276 var regexText=/<div(.*?)<\/div>/g; | 279 var regexText=/<div(.*?)<\/div>/g; |
277 var replaceText=""; | 280 var replaceText=""; |
278 textKeep = textKeep.replace(regexText, replaceText); | 281 textKeep = textKeep.replace(regexText, replaceText); |
279 | 282 |
291 } | 294 } |
292 el.innerHTML = str.replace(regexText, replaceText); | 295 el.innerHTML = str.replace(regexText, replaceText); |
293 }); | 296 }); |
294 newbutton.appendTo(newdiv); | 297 newbutton.appendTo(newdiv); |
295 | 298 |
296 var newbutton = $('<button>Remove all(with newline)</button>').mouseup(function (e2) { | 299 var newbutton = $('<button class="btn btn-default">Remove all(with newline)</button>').mouseup(function (e2) { |
297 var textKeep = $(this).parent().parent().html(); | 300 var textKeep = $(this).parent().parent().html(); |
298 var regexText=/<div(.*?)<\/div>/g; | 301 var regexText=/<div(.*?)<\/div>/g; |
299 var replaceText=""; | 302 var replaceText=""; |
300 textKeep = textKeep.replace(regexText, replaceText); | 303 textKeep = textKeep.replace(regexText, replaceText); |
301 | 304 |
462 | 465 |
463 alert( "Tagged "+str.match(regexText).length+" entities!" ); | 466 alert( "Tagged "+str.match(regexText).length+" entities!" ); |
464 } | 467 } |
465 | 468 |
466 function smartRegexNew() { | 469 function smartRegexNew() { |
467 $('#smartRegexPopUpDiv').css("display", "block"); | 470 |
471 var popup_status = $('#smartRegexPopUpDiv').css("display"); | |
472 if (popup_status == "block") { | |
473 $('#smartRegexPopUpDiv').css("display", "none"); | |
474 } else { | |
475 $('#smartRegexPopUpDiv').css("display", "block"); | |
476 } | |
477 | |
468 | 478 |
469 $('#smartRegexPopUpAdd').attr("disabled", false); | 479 $('#smartRegexPopUpAdd').attr("disabled", false); |
470 $('#smartRegexPopUpEdit').attr("disabled", "disabled"); | 480 $('#smartRegexPopUpEdit').attr("disabled", "disabled"); |
471 $('#smartRegexPopUpDel').attr("disabled", "disabled"); | 481 $('#smartRegexPopUpDel').attr("disabled", "disabled"); |
472 $('#smartRegexPopUpBack').attr("disabled", "disabled"); | 482 $('#smartRegexPopUpBack').attr("disabled", "disabled"); |
474 | 484 |
475 } | 485 } |
476 | 486 |
477 function replaceSmartClose() { | 487 function replaceSmartClose() { |
478 $('#smartRegexShowDiv > span').css("border","1px solid black"); | 488 $('#smartRegexShowDiv > span').css("border","1px solid black"); |
479 | |
480 | |
481 $('#smartRegexPopUpDiv').css("display", "none"); | 489 $('#smartRegexPopUpDiv').css("display", "none"); |
482 $("#smartRegexPopUpSelectWord").val("NULL"); | 490 $("#smartRegexPopUpSelectWord").val("NULL"); |
483 $("#smartRegexPopUpText").val(""); | 491 $("#smartRegexPopUpText").val(""); |
484 $("#smartRegexPopUpName").val(""); | 492 $("#smartRegexPopUpName").val(""); |
485 } | 493 } |
543 $('#smartRegexShowDiv').append(newdiv); | 551 $('#smartRegexShowDiv').append(newdiv); |
544 | 552 |
545 replaceSmartClose(); | 553 replaceSmartClose(); |
546 } | 554 } |
547 | 555 |
556 | |
548 $(document).on("click", '#smartRegexShowDiv > span', function (e) { | 557 $(document).on("click", '#smartRegexShowDiv > span', function (e) { |
549 | 558 |
550 | 559 |
551 | 560 |
552 $('#smartRegexPopUpDiv').css("display", "block"); | 561 $('#smartRegexPopUpDiv').css("display", "block"); |
563 $('#smartRegexPopUpEdit').attr("disabled", false); | 572 $('#smartRegexPopUpEdit').attr("disabled", false); |
564 $('#smartRegexPopUpDel').attr("disabled", false); | 573 $('#smartRegexPopUpDel').attr("disabled", false); |
565 $('#smartRegexPopUpBack').attr("disabled", false); | 574 $('#smartRegexPopUpBack').attr("disabled", false); |
566 $('#smartRegexPopUpFor').attr("disabled", false); | 575 $('#smartRegexPopUpFor').attr("disabled", false); |
567 }); | 576 }); |
577 | |
578 function genRegexWindowOpen(){ | |
579 var btn_state = $('#regex_generator').css('display'); | |
580 if (btn_state == "block") { | |
581 $("#regex_generator").css("display", "none"); | |
582 $("#gen_regex_window_open_id").text("Open Gen Regex"); | |
583 } else { | |
584 $('#regex_generator').css("display", "block"); | |
585 $("#gen_regex_window_open_id").text("Close Gen Regex"); | |
586 } | |
587 } | |
588 function genRegexWindowClose(){ | |
589 $('#regex_generator').css("display", "none"); | |
590 } | |
591 | |
592 function sharedStart_(array){ | |
593 var A= array.concat().sort(), | |
594 a1= A[0], a2= A[A.length-1], L= a1.length, i= 0; | |
595 while(i<L && a1.charAt(i)=== a2.charAt(i)) i++; | |
596 return a1.substring(0, i); | |
597 } | |
598 | |
599 function longestCommonSubstring_(string1, string2){ | |
600 // init max value | |
601 var longestCommonSubstring = 0; | |
602 // init 2D array with 0 | |
603 var table = [], | |
604 len1 = string1.length, | |
605 len2 = string2.length, | |
606 row, col; | |
607 | |
608 for(row = 0; row <= len1; row++){ | |
609 table[row] = []; | |
610 for(col = 0; col <= len2; col++){ | |
611 table[row][col] = 0; | |
612 } | |
613 } | |
614 // fill table | |
615 var i, j; | |
616 for(i = 0; i < len1; i++){ | |
617 for(j = 0; j < len2; j++){ | |
618 if(string1[i]==string2[j]){ | |
619 if(table[i][j] == 0){ | |
620 table[i+1][j+1] = 1; | |
621 } else { | |
622 table[i+1][j+1] = table[i][j] + 1; | |
623 } | |
624 if(table[i+1][j+1] > longestCommonSubstring){ | |
625 longestCommonSubstring = table[i+1][j+1]; | |
626 } | |
627 } else { | |
628 table[i+1][j+1] = 0; | |
629 } | |
630 } | |
631 } | |
632 return longestCommonSubstring; | |
633 } | |
634 | |
635 function longestCommonSubstring(s1, s2) { | |
636 | |
637 var start_idx = 0; | |
638 var max_len = 0; | |
639 for (var i = 0; i < s1.length; i++) | |
640 { | |
641 for (var j = 0; j < s2.length; j++) | |
642 { | |
643 var x = 0; | |
644 while (s1.charAt(i + x) == s2.charAt(j + x)) | |
645 { | |
646 x++; | |
647 if ((i + x) >= s1.length || ((j + x) >= s2.length)) | |
648 break; | |
649 } | |
650 if (x > max_len) | |
651 { | |
652 max_len = x; | |
653 start_idx = i; | |
654 } | |
655 } | |
656 } | |
657 return s1.substring(start_idx, (start_idx + max_len)); | |
658 | |
659 } | |
660 | |
661 function getRegex(_pattern) { | |
662 console.log(_pattern[0]); | |
663 console.log(_pattern[1]); | |
664 var p0 = _pattern[0]; | |
665 var p1 = _pattern[1]; | |
666 | |
667 // TODO: find common pattern | |
668 var reg_str = ""; | |
669 // _p1 = 測試 | |
670 // _p2 = 測<tag_name>試</tag_name>一下 | |
671 var combined = []; | |
672 if (p0.length > p1.length) { | |
673 combined = p0; | |
674 } else if(p0.length < p1.length) { | |
675 combined = p1; | |
676 } else { // equal length | |
677 // find matching string | |
678 var cnt = p0.length; | |
679 for (var i = 0; i < cnt; i++) { | |
680 if (p1[i].tag != null) { | |
681 combined.push({tag:p1[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p1[i].txt.length+"}"}); | |
682 } else if (p0[i].tag != null) { | |
683 combined.push({tag:p0[i].tag, txt:"[^○如即而之有<>〈〉【】]{1,"+p0[i].txt.length+"}"}); | |
684 } else { | |
685 // find matching for text in each corresponding position | |
686 var texts = [p0[i].txt, p1[i].txt]; | |
687 var common = longestCommonSubstring(p0[i].txt, p1[i].txt); | |
688 | |
689 /* | |
690 var reg_for_common = "["; | |
691 for (var i = 0; i < common.length; i++) { | |
692 common[i]; | |
693 reg_for_common += common[i]+"|"; | |
694 }; | |
695 reg_for_common += "]"; | |
696 */ | |
697 combined.push({tag:null, txt:common}); | |
698 } | |
699 }; | |
700 } | |
701 | |
702 for (var i = 0; i < combined.length; i++) { | |
703 reg_str += combined[i].txt; | |
704 }; | |
705 | |
706 return reg_str; | |
707 } | |
708 | |
709 var pattern_obj = []; // record pattern array for regex generator. only contain pattern1 and pattern2 | |
710 | |
711 function genRegexBySelection(tag_item_div, _selection) { | |
712 var add_gen_regex_button = document.createElement("button"); | |
713 $(add_gen_regex_button).id = "addToGenRegex"; | |
714 $(add_gen_regex_button).addClass("btn btn-md"); | |
715 $(add_gen_regex_button).click( function(){ | |
716 // popup for selected words regex gen | |
717 console.log("Debug: "); | |
718 console.log(_selection); | |
719 | |
720 if (_selection.type == "Range") { | |
721 // select words, not just click on text | |
722 var anchor_node = _selection.anchorNode; | |
723 var focus_node = _selection.focusNode; | |
724 var sibling_node = anchor_node.nextElementSibling; | |
725 | |
726 | |
727 if (anchor_node && sibling_node && focus_node && container.innerHTML.indexOf( "br" ) == -1) { | |
728 // Chrome can work on this. | |
729 // Safari does not support some of the member in selection object | |
730 // container.innerHTML.indexOf( "br" ) == -1: selection does not contain br. | |
731 | |
732 var seleted_div = document.createElement("div"); | |
733 var seleted_obj = []; // array for selected text as well as its tag if it has any | |
734 | |
735 if (anchor_node == focus_node ) { | |
736 // selected text in plain text | |
737 var text_all = anchor_node.textContent; | |
738 var text_ = text_all.substring(_selection.anchorOffset, _selection.focusOffset); | |
739 $(seleted_div).text(text_); | |
740 seleted_obj.push({tag:null, txt:text_}); // push object into array | |
741 | |
742 } else { | |
743 // selected text contain tags | |
744 var text_before = anchor_node.textContent.substring(_selection.anchorOffset, anchor_node.length); | |
745 var tag_name = sibling_node.nodeName.toLowerCase(); | |
746 var tagged_text = sibling_node.textContent; | |
747 | |
748 var text_after = _selection.focusNode.textContent.substring(0, _selection.focusOffset); | |
749 | |
750 $(seleted_div).text(text_before+tagged_text+text_after); | |
751 seleted_obj.push({tag:null, txt:text_before}); | |
752 seleted_obj.push({tag:tag_name, txt:tagged_text}); | |
753 seleted_obj.push({tag:null, txt:text_after}); | |
754 | |
755 | |
756 console.log(text_before); | |
757 console.log(tag_name); | |
758 console.log(tagged_text); | |
759 console.log(text_after); | |
760 } | |
761 | |
762 | |
763 var generated_regex = ""; | |
764 // show generate regex window | |
765 $('#regex_generator').css("display", "block"); | |
766 $("#gen_regex_window_open_id").text("Close Gen Regex"); | |
767 | |
768 //var seleted_text = String(_selection).replace(/^\s+|\s+$/g,''); | |
769 var pattern1 = $('#regex_pattern1'); | |
770 var pattern2 = $('#regex_pattern2'); | |
771 if (pattern1.children().length == 0) { | |
772 pattern1.append(seleted_div); | |
773 pattern_obj.push(seleted_obj); | |
774 // pattern1.text(seleted_div.text()); | |
775 } else if (pattern2.children().length == 0) { | |
776 pattern2.append(seleted_div); | |
777 pattern_obj.push(seleted_obj); | |
778 //pattern2.text(seleted_div.text()); | |
779 generated_regex = getRegex(pattern_obj); | |
780 | |
781 } else { | |
782 // pattern1 and pattern2 are already having text | |
783 pattern1.children().remove(); | |
784 pattern1.append(pattern2.children()); | |
785 | |
786 pattern2.children().remove(); | |
787 pattern2.append(seleted_div); | |
788 | |
789 pattern_obj.shift(); | |
790 pattern_obj.push(seleted_obj); | |
791 | |
792 //pattern1.text(pattern2.text()); | |
793 //pattern2.text(seleted_div); | |
794 generated_regex = getRegex(pattern_obj); | |
795 | |
796 } | |
797 $('#generated_regex').text(generated_regex); | |
798 // --- | |
799 | |
800 $('#regex_generator_error_msg').text(""); | |
801 } else { | |
802 $('#regex_generator_error_msg').text("Note: Not a valid selection for regex generator."); | |
803 } | |
804 | |
805 $('.tagItemDivClass').remove(); // close the tag window | |
806 | |
807 } else if (_selection.type == "Caret") { | |
808 // TODO: click on tagged text case rather than select | |
809 // If do this process, also need to consider between browers since not all of them support | |
810 // and also need to modify pop_remove_tag_window | |
811 } | |
812 }); | |
813 | |
814 $(add_gen_regex_button).text("Add to Gen Regex"); | |
815 tag_item_div.appendChild(add_gen_regex_button); | |
816 } | |
817 | |
568 | 818 |
569 function smartRegexEmpty() { | 819 function smartRegexEmpty() { |
570 $('#smartRegexShowDiv').html(""); | 820 $('#smartRegexShowDiv').html(""); |
571 regex_element_index = 0; | 821 regex_element_index = 0; |
572 } | 822 } |
787 } | 1037 } |
788 } | 1038 } |
789 | 1039 |
790 function smartRegexLoad(topic_id) { | 1040 function smartRegexLoad(topic_id) { |
791 $('#load_regex_div').html(""); | 1041 $('#load_regex_div').html(""); |
792 $('#load_regex_div').css("display", "block"); | 1042 var popup_status = $('#load_regex_div').css("display"); |
1043 if (popup_status == "block") { | |
1044 $('#load_regex_div').css("display", "none"); | |
1045 } else { | |
1046 $('#load_regex_div').css("display", "block"); | |
1047 } | |
793 | 1048 |
794 var newselect = document.createElement("select"); | 1049 var newselect = document.createElement("select"); |
795 newselect.id = "loadRegexSelect"; | 1050 newselect.id = "loadRegexSelect"; |
796 | 1051 |
797 $.ajax({ | 1052 $.ajax({ |